NFS client updates for Linux 5.1

Highlights include:
 
 Stable fixes:
 - Fixes for NFS I/O request leakages
 - Fix error handling paths in the NFS I/O recoalescing code
 - Reinitialise NFSv4.1 sequence results before retransmitting a request
 - Fix a soft lockup in the delegation recovery code
 - Bulk destroy of layouts needs to be safe w.r.t. umount
 - Prevent thundering herd issues when the SUNRPC socket is not connected
 - Respect RPC call timeouts when retrying transmission
 
 Features:
 - Convert rpc auth layer to use xdr_streams
 - Config option to disable insecure RPCSEC_GSS crypto types
 - Reduce size of RPC receive buffers
 - Readdirplus optimization by cache mechanism
 - Convert SUNRPC socket send code to use iov_iter()
 - SUNRPC micro-optimisations to avoid indirect calls
 - Add support for the pNFS LAYOUTERROR operation and use it with the
   pNFS/flexfiles driver
 - Add trace events to report non-zero NFS status codes
 - Various removals of unnecessary dprintks
 
 Bugfixes and cleanups:
 - Fix a number of sparse warnings and documentation format warnings
 - Fix nfs_parse_devname to not modify it's argument
 - Fix potential corruption of page being written through pNFS/blocks
 - fix xfstest generic/099 failures on nfsv3
 - Avoid NFSv4.1 "false retries" when RPC calls are interrupted
 - Abort I/O early if the pNFS/flexfiles layout segment was invalidated
 - Avoid unnecessary pNFS/flexfiles layout invalidations
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJch5pDAAoJEA4mA3inWBJc8YcP/1/eyUfdkJ/OGDgXkPcgvDWd
 PzzfRuB6QdJnNCd+OIcdHMA/5v1QP+V1ubwBEIGhnbA5S5RZBaKaZR/UHMhfiqjn
 NJTFjYxE/2fqqOCqSoEEGD8or6fbxfLycfxpHo7xLjD95Hs9o4BeWDOULG3/X2v+
 ETu/wganWoUeVsMfyfeCk8FarMFoEiODcEn+iNHYz75GlmeE/BsqsjdIjO3FWUZP
 d0SoE7g6RtMF8NumXiP17cNFslWbvI6f1FKDRt8Cf3pDxpuLcHsRXYOEEEpBpKHS
 NbNKeUR9IBekUOMGp/LXF9jCaaJITq0k127NkZoVSQyaVsNQL0vM55awEQryOxkL
 Fsxi/u0y05EGtlvO+t4qhO4MJ5SywjJjepib1EyABK3/3lT9IjEW7iCGGap+DKd3
 im4Cx0bFi6wKiEmOeRpkrt72qPC9l6fySnEVnnGILFVovhCBdKftLicqfQPIo2CB
 MsUODROT4H2YklUsWJB55kjL4CgVRGFC+Nx8R6ZXi8DjO8HBfOf05RNI7tBnd/un
 /YYkJxLJR9RRqkyTbJd5nddM3FSgMBc6k5ptHDhpCIzN0Inf4U5Z7FFkRUzM2eTc
 DWFifcro9lrbaIhEMQJMIVfxHQccK0FrOU1MBbX0MY52RzHnRC0XyIm/3WDPqLzb
 43tZY1orRnTFDeIF5jSs
 =9rH/
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fixes for NFS I/O request leakages
   - Fix error handling paths in the NFS I/O recoalescing code
   - Reinitialise NFSv4.1 sequence results before retransmitting a
     request
   - Fix a soft lockup in the delegation recovery code
   - Bulk destroy of layouts needs to be safe w.r.t. umount
   - Prevent thundering herd issues when the SUNRPC socket is not
     connected
   - Respect RPC call timeouts when retrying transmission

  Features:
   - Convert rpc auth layer to use xdr_streams
   - Config option to disable insecure RPCSEC_GSS crypto types
   - Reduce size of RPC receive buffers
   - Readdirplus optimization by cache mechanism
   - Convert SUNRPC socket send code to use iov_iter()
   - SUNRPC micro-optimisations to avoid indirect calls
   - Add support for the pNFS LAYOUTERROR operation and use it with the
     pNFS/flexfiles driver
   - Add trace events to report non-zero NFS status codes
   - Various removals of unnecessary dprintks

  Bugfixes and cleanups:
   - Fix a number of sparse warnings and documentation format warnings
   - Fix nfs_parse_devname to not modify it's argument
   - Fix potential corruption of page being written through pNFS/blocks
   - fix xfstest generic/099 failures on nfsv3
   - Avoid NFSv4.1 "false retries" when RPC calls are interrupted
   - Abort I/O early if the pNFS/flexfiles layout segment was
     invalidated
   - Avoid unnecessary pNFS/flexfiles layout invalidations"

* tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (90 commits)
  SUNRPC: Take the transport send lock before binding+connecting
  SUNRPC: Micro-optimise when the task is known not to be sleeping
  SUNRPC: Check whether the task was transmitted before rebind/reconnect
  SUNRPC: Remove redundant calls to RPC_IS_QUEUED()
  SUNRPC: Clean up
  SUNRPC: Respect RPC call timeouts when retrying transmission
  SUNRPC: Fix up RPC back channel transmission
  SUNRPC: Prevent thundering herd when the socket is not connected
  SUNRPC: Allow dynamic allocation of back channel slots
  NFSv4.1: Bump the default callback session slot count to 16
  SUNRPC: Convert remaining GFP_NOIO, and GFP_NOWAIT sites in sunrpc
  NFS/flexfiles: Clean up mirror DS initialisation
  NFS/flexfiles: Remove dead code in ff_layout_mirror_valid()
  NFS/flexfile: Simplify nfs4_ff_layout_select_ds_stateid()
  NFS/flexfile: Simplify nfs4_ff_layout_ds_version()
  NFS/flexfiles: Simplify ff_layout_get_ds_cred()
  NFS/flexfiles: Simplify nfs4_ff_find_or_create_ds_client()
  NFS/flexfiles: Simplify nfs4_ff_layout_select_ds_fh()
  NFS/flexfiles: Speed up read failover when DSes are down
  NFS/flexfiles: Don't invalidate DS deviceids for being unresponsive
  ...
This commit is contained in:
Linus Torvalds 2019-03-12 14:50:42 -07:00
commit 1fbf3e4812
83 changed files with 3368 additions and 2205 deletions

View File

@ -74,17 +74,6 @@ static void nlm4_compute_offsets(const struct nlm_lock *lock,
*l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
}
/*
* Handle decode buffer overflows out-of-line.
*/
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
dprintk("lockd: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
/*
* Encode/decode NLMv4 basic data types
*
@ -176,7 +165,6 @@ static int decode_cookie(struct xdr_stream *xdr,
dprintk("NFS: returned cookie was too long: %u\n", length);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
@ -236,7 +224,6 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
__func__, be32_to_cpup(p));
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
@ -309,7 +296,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
out:
return error;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}

View File

@ -70,17 +70,6 @@ static void nlm_compute_offsets(const struct nlm_lock *lock,
*l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
}
/*
* Handle decode buffer overflows out-of-line.
*/
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
dprintk("lockd: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
/*
* Encode/decode NLMv3 basic data types
*
@ -173,7 +162,6 @@ static int decode_cookie(struct xdr_stream *xdr,
dprintk("NFS: returned cookie was too long: %u\n", length);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
@ -231,7 +219,6 @@ static int decode_nlm_stat(struct xdr_stream *xdr,
__func__, be32_to_cpup(p));
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
@ -303,7 +290,6 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
out:
return error;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}

View File

@ -72,16 +72,6 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
return xdr_ressize_check(rqstp, p);
}
static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;
p = xdr_inline_decode(xdr, nbytes);
if (unlikely(p == NULL))
printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
return p;
}
static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
const char **str, size_t maxlen)
{
@ -98,13 +88,13 @@ static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
{
__be32 *p;
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
fh->size = ntohl(*p);
if (fh->size > NFS4_FHSIZE)
return htonl(NFS4ERR_BADHANDLE);
p = read_buf(xdr, fh->size);
p = xdr_inline_decode(xdr, fh->size);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(&fh->data[0], p, fh->size);
@ -117,11 +107,11 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
__be32 *p;
unsigned int attrlen;
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
attrlen = ntohl(*p);
p = read_buf(xdr, attrlen << 2);
p = xdr_inline_decode(xdr, attrlen << 2);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
if (likely(attrlen > 0))
@ -135,7 +125,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
__be32 *p;
p = read_buf(xdr, NFS4_STATEID_SIZE);
p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(stateid->data, p, NFS4_STATEID_SIZE);
@ -156,7 +146,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ);
if (unlikely(status != 0))
return status;
p = read_buf(xdr, 12);
p = xdr_inline_decode(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
hdr->minorversion = ntohl(*p++);
@ -176,7 +166,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
{
__be32 *p;
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE_HDR);
*op = ntohl(*p);
@ -205,7 +195,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp,
status = decode_delegation_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
return status;
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
args->truncate = ntohl(*p);
@ -227,7 +217,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
__be32 status = 0;
uint32_t iomode;
p = read_buf(xdr, 4 * sizeof(uint32_t));
p = xdr_inline_decode(xdr, 4 * sizeof(uint32_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
@ -245,14 +235,14 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
if (unlikely(status != 0))
return status;
p = read_buf(xdr, 2 * sizeof(uint64_t));
p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_range.offset);
p = xdr_decode_hyper(p, &args->cbl_range.length);
return decode_layout_stateid(xdr, &args->cbl_stateid);
} else if (args->cbl_recall_type == RETURN_FSID) {
p = read_buf(xdr, 2 * sizeof(uint64_t));
p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_fsid.major);
@ -275,7 +265,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
args->ndevs = 0;
/* Num of device notifications */
p = read_buf(xdr, sizeof(uint32_t));
p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto out;
@ -298,7 +288,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
for (i = 0; i < n; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];
p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
p = xdr_inline_decode(xdr, (4 * sizeof(uint32_t)) +
NFS4_DEVICEID4_SIZE);
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
@ -329,7 +320,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
p = read_buf(xdr, sizeof(uint32_t));
p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
@ -359,7 +350,7 @@ static __be32 decode_sessionid(struct xdr_stream *xdr,
{
__be32 *p;
p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN);
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
@ -379,13 +370,13 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
goto out;
status = htonl(NFS4ERR_RESOURCE);
p = read_buf(xdr, sizeof(uint32_t));
p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL))
goto out;
rc_list->rcl_nrefcalls = ntohl(*p++);
if (rc_list->rcl_nrefcalls) {
p = read_buf(xdr,
p = xdr_inline_decode(xdr,
rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
if (unlikely(p == NULL))
goto out;
@ -418,7 +409,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
if (status)
return status;
p = read_buf(xdr, 5 * sizeof(uint32_t));
p = xdr_inline_decode(xdr, 5 * sizeof(uint32_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
@ -461,7 +452,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
uint32_t bitmap[2];
__be32 *p, status;
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->craa_objs_to_keep = ntohl(*p++);
@ -480,7 +471,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
struct cb_recallslotargs *args = argp;
__be32 *p;
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->crsa_target_highest_slotid = ntohl(*p++);
@ -492,14 +483,14 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
__be32 *p;
unsigned int len;
p = read_buf(xdr, 12);
p = xdr_inline_decode(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbnl_owner.clientid);
len = be32_to_cpu(*p);
p = read_buf(xdr, len);
p = xdr_inline_decode(xdr, len);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
@ -537,7 +528,7 @@ static __be32 decode_write_response(struct xdr_stream *xdr,
__be32 *p;
/* skip the always zero field */
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out;
p++;
@ -577,7 +568,7 @@ static __be32 decode_offload_args(struct svc_rqst *rqstp,
return status;
/* decode status */
p = read_buf(xdr, 4);
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out;
args->error = ntohl(*p++);
@ -943,10 +934,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
};
unsigned int nops = 0;
xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
xdr_init_decode(&xdr_in, &rqstp->rq_arg,
rqstp->rq_arg.head[0].iov_base, NULL);
p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL);
status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
if (status == htonl(NFS4ERR_RESOURCE))

View File

@ -229,6 +229,8 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
spin_lock(&delegation->lock);
if (delegation->inode != NULL)
inode = igrab(delegation->inode);
if (!inode)
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
spin_unlock(&delegation->lock);
return inode;
}
@ -681,7 +683,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
/**
* nfs_super_return_all_delegations - return delegations for one superblock
* @sb: sb to process
* @server: pointer to nfs_server to process
*
*/
void nfs_server_return_all_delegations(struct nfs_server *server)
@ -944,10 +946,11 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags))
continue;
if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
if (!nfs_sb_active(server->super))
@ -1053,10 +1056,11 @@ void nfs_reap_expired_delegations(struct nfs_client *clp)
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags))
continue;
if (test_bit(NFS_DELEGATION_TEST_EXPIRED,
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags) == 0)
continue;
if (!nfs_sb_active(server->super))

View File

@ -34,6 +34,7 @@ enum {
NFS_DELEGATION_RETURNING,
NFS_DELEGATION_REVOKED,
NFS_DELEGATION_TEST_EXPIRED,
NFS_DELEGATION_INODE_FREEING,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,

View File

@ -139,12 +139,19 @@ struct nfs_cache_array {
struct nfs_cache_array_entry array[0];
};
struct readdirvec {
unsigned long nr;
unsigned long index;
struct page *pages[NFS_MAX_READDIR_RAPAGES];
};
typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
struct file *file;
struct page *page;
struct dir_context *ctx;
unsigned long page_index;
struct readdirvec pvec;
u64 *dir_cookie;
u64 last_cookie;
loff_t current_index;
@ -524,6 +531,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
struct nfs_cache_array *array;
unsigned int count = 0;
int status;
int max_rapages = NFS_MAX_READDIR_RAPAGES;
desc->pvec.index = desc->page_index;
desc->pvec.nr = 0;
scratch = alloc_page(GFP_KERNEL);
if (scratch == NULL)
@ -548,20 +559,40 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
if (desc->plus)
nfs_prime_dcache(file_dentry(desc->file), entry);
status = nfs_readdir_add_to_array(entry, page);
status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
if (status == -ENOSPC) {
desc->pvec.nr++;
if (desc->pvec.nr == max_rapages)
break;
status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
}
if (status != 0)
break;
} while (!entry->eof);
/*
* page and desc->pvec.pages[0] are valid, don't need to check
* whether or not to be NULL.
*/
copy_highpage(page, desc->pvec.pages[0]);
out_nopages:
if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
array = kmap(page);
array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]);
array->eof_index = array->size;
status = 0;
kunmap(page);
kunmap_atomic(array);
}
put_page(scratch);
/*
* desc->pvec.nr > 0 means at least one page was completely filled,
* we should return -ENOSPC. Otherwise function
* nfs_readdir_xdr_to_array will enter infinite loop.
*/
if (desc->pvec.nr > 0)
return -ENOSPC;
return status;
}
@ -574,8 +605,8 @@ void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
}
/*
* nfs_readdir_large_page will allocate pages that must be freed with a call
* to nfs_readdir_free_pagearray
* nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
* to nfs_readdir_free_pages()
*/
static
int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
@ -595,6 +626,24 @@ int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
return -ENOMEM;
}
/*
* nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure.
*/
static
void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc)
{
struct nfs_cache_array *array;
int max_rapages = NFS_MAX_READDIR_RAPAGES;
int index;
for (index = 0; index < max_rapages; index++) {
array = kmap_atomic(desc->pvec.pages[index]);
memset(array, 0, sizeof(struct nfs_cache_array));
array->eof_index = -1;
kunmap_atomic(array);
}
}
static
int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
{
@ -605,6 +654,12 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);
/*
* This means we hit readdir rdpages miss, the preallocated rdpages
* are useless, the preallocate rdpages should be reinitialized.
*/
nfs_readdir_rapages_init(desc);
entry.prev_cookie = 0;
entry.cookie = desc->last_cookie;
entry.eof = 0;
@ -664,9 +719,24 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
struct inode *inode = file_inode(desc->file);
int ret;
ret = nfs_readdir_xdr_to_array(desc, page, inode);
if (ret < 0)
goto error;
/*
* If desc->page_index in range desc->pvec.index and
* desc->pvec.index + desc->pvec.nr, we get readdir cache hit.
*/
if (desc->page_index >= desc->pvec.index &&
desc->page_index < (desc->pvec.index + desc->pvec.nr)) {
/*
* page and desc->pvec.pages[x] are valid, don't need to check
* whether or not to be NULL.
*/
copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]);
ret = 0;
} else {
ret = nfs_readdir_xdr_to_array(desc, page, inode);
if (ret < 0)
goto error;
}
SetPageUptodate(page);
if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
@ -831,6 +901,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
*desc = &my_desc;
struct nfs_open_dir_context *dir_ctx = file->private_data;
int res = 0;
int max_rapages = NFS_MAX_READDIR_RAPAGES;
dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
file, (long long)ctx->pos);
@ -850,6 +921,12 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->decode = NFS_PROTO(inode)->decode_dirent;
desc->plus = nfs_use_readdirplus(inode, ctx);
res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages);
if (res < 0)
return -ENOMEM;
nfs_readdir_rapages_init(desc);
if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
@ -885,6 +962,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
} while (!desc->eof);
out:
nfs_readdir_free_pages(desc->pvec.pages, max_rapages);
if (res > 0)
res = 0;
dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
@ -945,7 +1023,7 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
/**
* nfs_force_lookup_revalidate - Mark the directory as having changed
* @dir - pointer to directory inode
* @dir: pointer to directory inode
*
* This forces the revalidation code in nfs_lookup_revalidate() to do a
* full lookup on all child dentries of 'dir' whenever a change occurs
@ -1649,7 +1727,7 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
return nfs_lookup_revalidate_dentry(dir, dentry, inode);
full_reval:
return nfs_do_lookup_revalidate(dir, dentry, flags);

View File

@ -428,7 +428,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
hdr->release(hdr);
}
static void nfs_read_sync_pgio_error(struct list_head *head)
static void nfs_read_sync_pgio_error(struct list_head *head, int error)
{
struct nfs_page *req;
@ -664,8 +664,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
if (!nfs_pageio_add_request(&desc, req)) {
nfs_list_remove_request(req);
nfs_list_add_request(req, &failed);
nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
dreq->flags = 0;
if (desc.pg_error < 0)
@ -821,7 +820,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
hdr->release(hdr);
}
static void nfs_write_sync_pgio_error(struct list_head *head)
static void nfs_write_sync_pgio_error(struct list_head *head, int error)
{
struct nfs_page *req;

View File

@ -89,8 +89,8 @@ EXPORT_SYMBOL_GPL(nfs_file_release);
/**
* nfs_revalidate_size - Revalidate the file size
* @inode - pointer to inode struct
* @file - pointer to struct file
* @inode: pointer to inode struct
* @filp: pointer to struct file
*
* Revalidates the file length. This is basically a wrapper around
* nfs_revalidate_inode() that takes into account the fact that we may
@ -276,6 +276,12 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
* then a modify/write/read cycle when writing to a page in the
* page cache.
*
* Some pNFS layout drivers can only read/write at a certain block
* granularity like all block devices and therefore we must perform
* read/modify/write whenever a page hasn't read yet and the data
* to be written there is not aligned to a block boundary and/or
* smaller than the block size.
*
* The modify/write/read cycle may occur if a page is read before
* being completely filled by the writer. In this situation, the
* page must be completely written to stable storage on the server
@ -291,26 +297,32 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
* and that the new data won't completely replace the old data in
* that range of the file.
*/
static int nfs_want_read_modify_write(struct file *file, struct page *page,
loff_t pos, unsigned len)
static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
{
unsigned int pglen = nfs_page_length(page);
unsigned int offset = pos & (PAGE_SIZE - 1);
unsigned int end = offset + len;
if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
if (!PageUptodate(page))
return 1;
return 0;
}
return !pglen || (end >= pglen && !offset);
}
if ((file->f_mode & FMODE_READ) && /* open for read? */
!PageUptodate(page) && /* Uptodate? */
!PagePrivate(page) && /* i/o request already? */
pglen && /* valid bytes of file? */
(end < pglen || offset)) /* replace all valid bytes? */
return 1;
return 0;
static bool nfs_want_read_modify_write(struct file *file, struct page *page,
loff_t pos, unsigned int len)
{
/*
* Up-to-date pages, those with ongoing or full-page write
* don't need read/modify/write
*/
if (PageUptodate(page) || PagePrivate(page) ||
nfs_full_page_write(page, pos, len))
return false;
if (pnfs_ld_read_whole_page(file->f_mapping->host))
return true;
/* Open for reading too? */
if (file->f_mode & FMODE_READ)
return true;
return false;
}
/*

View File

@ -410,7 +410,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
struct cred *kcred;
const struct cred *cred;
const struct cred __rcu *cred;
kuid_t uid;
kgid_t gid;
u32 ds_count, fh_count, id;
@ -501,7 +501,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free;
kcred->fsuid = uid;
kcred->fsgid = gid;
cred = kcred;
cred = RCU_INITIALIZER(kcred);
if (lgr->range.iomode == IOMODE_READ)
rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
@ -788,30 +788,82 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
}
}
static void
ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
if (devid)
nfs4_mark_deviceid_unavailable(devid);
}
static void
ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
if (devid)
nfs4_mark_deviceid_available(devid);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
int start_idx,
int *best_idx)
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
int start_idx, int *best_idx,
bool check_device)
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
bool fail_return = false;
int idx;
/* mirrors are sorted by efficiency */
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
if (idx+1 == fls->mirror_array_cnt)
fail_return = true;
ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
if (ds) {
*best_idx = idx;
return ds;
}
fail_return = !check_device;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
if (!ds)
continue;
if (check_device &&
nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
continue;
*best_idx = idx;
return ds;
}
return NULL;
}
static struct nfs4_pnfs_ds *
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
int start_idx, int *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
int start_idx, int *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
int start_idx, int *best_idx)
{
struct nfs4_pnfs_ds *ds;
ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
if (ds)
return ds;
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
}
static void
ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req,
@ -925,7 +977,8 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
goto out_mds;
for (i = 0; i < pgio->pg_mirror_count; i++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
if (!ds) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
@ -936,7 +989,6 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
goto retry;
}
pgm = &pgio->pg_mirrors[i];
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
}
@ -1071,6 +1123,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
break;
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
case -EAGAIN:
return -NFS4ERR_RESET_TO_PNFS;
/* Invalidate Layout errors */
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
@ -1131,6 +1185,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
case -EBADHANDLE:
case -ELOOP:
case -ENOSPC:
case -EAGAIN:
break;
case -EJUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
@ -1158,8 +1213,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
if (task->tk_status >= 0)
if (task->tk_status >= 0) {
ff_layout_mark_ds_reachable(lseg, idx);
return 0;
}
/* Handle the case of an invalid layout segment */
if (!pnfs_is_valid_lseg(lseg))
@ -1222,6 +1279,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, offset, length, status, opnum,
GFP_NOIO);
if (status == NFS4ERR_NXIO)
ff_layout_mark_ds_unreachable(lseg, idx);
pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
}
@ -1249,7 +1308,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
if (ff_layout_choose_best_ds_for_read(hdr->lseg,
hdr->pgio_mirror_idx + 1,
&hdr->pgio_mirror_idx))
goto out_eagain;
goto out_layouterror;
set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
@ -1260,6 +1319,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
}
return 0;
out_layouterror:
ff_layout_send_layouterror(hdr->lseg);
out_eagain:
rpc_restart_call_prepare(task);
return -EAGAIN;
@ -1293,15 +1354,6 @@ ff_layout_set_layoutcommit(struct inode *inode,
(unsigned long long) NFS_I(inode)->layout->plh_lwb);
}
static bool
ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx)
{
/* No mirroring for now */
struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx);
return ff_layout_test_devid_unavailable(node);
}
static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
@ -1332,10 +1384,6 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
rpc_exit(task, -EIO);
return -EIO;
}
if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
rpc_exit(task, -EHOSTDOWN);
return -EAGAIN;
}
ff_layout_read_record_layoutstats_start(task, hdr);
return 0;
@ -1369,6 +1417,16 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
ff_layout_read_prepare_common(task, hdr);
}
static void
ff_layout_io_prepare_transmit(struct rpc_task *task,
void *data)
{
struct nfs_pgio_header *hdr = data;
if (!pnfs_is_valid_lseg(hdr->lseg))
rpc_exit(task, -EAGAIN);
}
static void ff_layout_read_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
@ -1399,9 +1457,10 @@ static void ff_layout_read_release(void *data)
struct nfs_pgio_header *hdr = data;
ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
ff_layout_send_layouterror(hdr->lseg);
pnfs_read_resend_pnfs(hdr);
else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
} else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
ff_layout_reset_read(hdr);
pnfs_generic_rw_release(data);
}
@ -1513,11 +1572,6 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
return -EIO;
}
if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
rpc_exit(task, -EHOSTDOWN);
return -EAGAIN;
}
ff_layout_write_record_layoutstats_start(task, hdr);
return 0;
}
@ -1573,9 +1627,10 @@ static void ff_layout_write_release(void *data)
struct nfs_pgio_header *hdr = data;
ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
ff_layout_send_layouterror(hdr->lseg);
ff_layout_reset_write(hdr, true);
else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
} else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
ff_layout_reset_write(hdr, false);
pnfs_generic_rw_release(data);
}
@ -1657,6 +1712,7 @@ static void ff_layout_commit_release(void *data)
static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
.rpc_call_prepare = ff_layout_read_prepare_v3,
.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
.rpc_release = ff_layout_read_release,
@ -1664,6 +1720,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
.rpc_call_prepare = ff_layout_read_prepare_v4,
.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
.rpc_release = ff_layout_read_release,
@ -1671,6 +1728,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
.rpc_call_prepare = ff_layout_write_prepare_v3,
.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
.rpc_release = ff_layout_write_release,
@ -1678,6 +1736,7 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
.rpc_call_prepare = ff_layout_write_prepare_v4,
.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
.rpc_release = ff_layout_write_release,
@ -1703,6 +1762,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
u32 idx = hdr->pgio_mirror_idx;
@ -1713,20 +1773,21 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
if (!ds)
goto out_failed;
ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
if (IS_ERR(ds_clnt))
goto out_failed;
ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
if (!ds_cred)
goto out_failed;
vers = nfs4_ff_layout_ds_version(lseg, idx);
vers = nfs4_ff_layout_ds_version(mirror);
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
@ -1734,13 +1795,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->pgio_done_cb = ff_layout_read_done_cb;
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
fh = nfs4_ff_layout_select_ds_fh(mirror);
if (fh)
hdr->args.fh = fh;
if (vers == 4 &&
!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
goto out_failed;
nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@ -1770,26 +1829,28 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
int idx = hdr->pgio_mirror_idx;
ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
if (!ds)
goto out_failed;
ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
if (IS_ERR(ds_clnt))
goto out_failed;
ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
if (!ds_cred)
goto out_failed;
vers = nfs4_ff_layout_ds_version(lseg, idx);
vers = nfs4_ff_layout_ds_version(mirror);
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
@ -1800,13 +1861,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
fh = nfs4_ff_layout_select_ds_fh(mirror);
if (fh)
hdr->args.fh = fh;
if (vers == 4 &&
!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
goto out_failed;
nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@ -1849,6 +1908,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
int vers, ret;
@ -1859,20 +1919,21 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
goto out_err;
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
if (!ds)
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
data->inode);
if (IS_ERR(ds_clnt))
goto out_err;
ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred);
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred);
if (!ds_cred)
goto out_err;
vers = nfs4_ff_layout_ds_version(lseg, idx);
vers = nfs4_ff_layout_ds_version(mirror);
dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
@ -2036,7 +2097,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr,
dprintk("%s: Begin\n", __func__);
xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);
ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
@ -2102,6 +2163,52 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args)
return -ENOMEM;
}
#ifdef CONFIG_NFS_V4_2
void
ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct nfs42_layout_error *errors;
LIST_HEAD(head);
if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR))
return;
ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1);
if (list_empty(&head))
return;
errors = kmalloc_array(NFS42_LAYOUTERROR_MAX,
sizeof(*errors), GFP_NOFS);
if (errors != NULL) {
const struct nfs4_ff_layout_ds_err *pos;
size_t n = 0;
list_for_each_entry(pos, &head, list) {
errors[n].offset = pos->offset;
errors[n].length = pos->length;
nfs4_stateid_copy(&errors[n].stateid, &pos->stateid);
errors[n].errors[0].dev_id = pos->deviceid;
errors[n].errors[0].status = pos->status;
errors[n].errors[0].opnum = pos->opnum;
n++;
if (!list_is_last(&pos->list, &head) &&
n < NFS42_LAYOUTERROR_MAX)
continue;
if (nfs42_proc_layouterror(lseg, errors, n) < 0)
break;
n = 0;
}
kfree(errors);
}
ff_layout_free_ds_ioerr(&head);
}
#else
void
ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
{
}
#endif
static int
ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
{

View File

@ -132,16 +132,6 @@ FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg)
generic_hdr);
}
static inline struct nfs4_deviceid_node *
FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
{
if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt ||
FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL ||
FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL)
return NULL;
return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node;
}
static inline struct nfs4_ff_layout_ds *
FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node)
{
@ -151,9 +141,25 @@ FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node)
static inline struct nfs4_ff_layout_mirror *
FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx)
{
if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt)
return NULL;
return FF_LAYOUT_LSEG(lseg)->mirror_array[idx];
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
if (idx < fls->mirror_array_cnt)
return fls->mirror_array[idx];
return NULL;
}
static inline struct nfs4_deviceid_node *
FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx);
if (mirror != NULL) {
struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds;
if (!IS_ERR_OR_NULL(mirror_ds))
return &mirror_ds->id_node;
}
return NULL;
}
static inline u32
@ -174,28 +180,10 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_READ_IO;
}
static inline bool
ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
{
/*
* Flexfiles should never mark a DS unavailable, but if it does
* print a (ratelimited) warning as this can affect performance.
*/
if (nfs4_test_deviceid_unavailable(node)) {
u32 *p = (u32 *)node->deviceid.data;
pr_warn_ratelimited("NFS: flexfiles layout referencing an "
"unavailable device [%x%x%x%x]\n",
p[0], p[1], p[2], p[3]);
return true;
}
return false;
}
static inline int
nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx)
nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror)
{
return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version;
return mirror->mirror_ds->ds_versions[0].version;
}
struct nfs4_ff_layout_ds *
@ -207,6 +195,7 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
struct nfs4_ff_layout_mirror *mirror, u64 offset,
u64 length, int status, enum nfs_opnum4 opnum,
gfp_t gfp_flags);
void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg);
int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head);
void ff_layout_free_ds_ioerr(struct list_head *head);
unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
@ -214,23 +203,23 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
struct list_head *head,
unsigned int maxnum);
struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
int
nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
u32 mirror_idx,
nfs4_stateid *stateid);
nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror);
void
nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
nfs4_stateid *stateid);
struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool fail_return);
struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,
u32 ds_idx,
nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
struct nfs_client *ds_clp,
struct inode *inode);
const struct cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
u32 ds_idx, const struct cred *mdscred);
const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
const struct pnfs_layout_range *range,
const struct cred *mdscred);
bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);
bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg);

View File

@ -183,56 +183,6 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
return NULL;
}
static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg,
struct nfs4_deviceid_node *devid)
{
nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid);
if (!ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
lseg);
}
static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool create)
{
if (mirror == NULL || IS_ERR(mirror->mirror_ds))
goto outerr;
if (mirror->mirror_ds == NULL) {
if (create) {
struct nfs4_deviceid_node *node;
struct pnfs_layout_hdr *lh = lseg->pls_layout;
struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode),
&mirror->devid, lh->plh_lc_cred,
GFP_KERNEL);
if (node)
mirror_ds = FF_LAYOUT_MIRROR_DS(node);
/* check for race with another call to this function */
if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
mirror_ds != ERR_PTR(-ENODEV))
nfs4_put_deviceid_node(node);
} else
goto outerr;
}
if (IS_ERR(mirror->mirror_ds))
goto outerr;
if (mirror->mirror_ds->ds == NULL) {
struct nfs4_deviceid_node *devid;
devid = &mirror->mirror_ds->id_node;
ff_layout_mark_devid_invalid(lseg, devid);
return false;
}
return true;
outerr:
pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
return false;
}
static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
u64 offset, u64 length)
{
@ -326,7 +276,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
ff_layout_add_ds_error_locked(flo, dserr);
spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
return 0;
}
@ -353,46 +302,54 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
}
struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
struct nfs_fh *fh = NULL;
if (!ff_layout_mirror_valid(lseg, mirror, false)) {
pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
__func__, mirror_idx);
goto out;
}
/* FIXME: For now assume there is only 1 version available for the DS */
fh = &mirror->fh_versions[0];
out:
return fh;
return &mirror->fh_versions[0];
}
int
nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
u32 mirror_idx,
nfs4_stateid *stateid)
void
nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
nfs4_stateid *stateid)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
if (nfs4_ff_layout_ds_version(mirror) == 4)
nfs4_stateid_copy(stateid, &mirror->stateid);
}
if (!ff_layout_mirror_valid(lseg, mirror, false)) {
pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
__func__, mirror_idx);
goto out;
static bool
ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo,
struct nfs4_ff_layout_mirror *mirror)
{
if (mirror == NULL)
goto outerr;
if (mirror->mirror_ds == NULL) {
struct nfs4_deviceid_node *node;
struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode),
&mirror->devid, lo->plh_lc_cred,
GFP_KERNEL);
if (node)
mirror_ds = FF_LAYOUT_MIRROR_DS(node);
/* check for race with another call to this function */
if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
mirror_ds != ERR_PTR(-ENODEV))
nfs4_put_deviceid_node(node);
}
nfs4_stateid_copy(stateid, &mirror->stateid);
return 1;
out:
return 0;
if (IS_ERR(mirror->mirror_ds))
goto outerr;
return true;
outerr:
return false;
}
/**
* nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
* @lseg: the layout segment we're operating on
* @ds_idx: index of the DS to use
* @mirror: layout mirror describing the DS to use
* @fail_return: return layout on connect failure?
*
* Try to prepare a DS connection to accept an RPC call. This involves
@ -407,26 +364,18 @@ nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
* Returns a pointer to a connected DS object on success or NULL on failure.
*/
struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool fail_return)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
struct nfs4_pnfs_ds *ds = NULL;
struct nfs4_deviceid_node *devid;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
int status;
if (!ff_layout_mirror_valid(lseg, mirror, true)) {
pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
goto out;
}
devid = &mirror->mirror_ds->id_node;
if (ff_layout_test_devid_unavailable(devid))
goto out_fail;
if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
goto noconnect;
ds = mirror->mirror_ds->ds;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@ -437,8 +386,8 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
*/
status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
dataserver_retrans,
status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node,
dataserver_timeo, dataserver_retrans,
mirror->mirror_ds->ds_versions[0].version,
mirror->mirror_ds->ds_versions[0].minor_version);
@ -453,11 +402,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror->mirror_ds->ds_versions[0].wsize = max_payload;
goto out;
}
out_fail:
noconnect:
ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL;
@ -466,14 +416,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
}
const struct cred *
ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
const struct pnfs_layout_range *range,
const struct cred *mdscred)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
const struct cred *cred;
if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
cred = ff_layout_get_mirror_cred(mirror, range->iomode);
if (!cred)
cred = get_cred(mdscred);
} else {
@ -483,15 +433,18 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
}
/**
* Find or create a DS rpc client with th MDS server rpc client auth flavor
* in the nfs_client cl_ds_clients list.
*/
* nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client
* @mirror: pointer to the mirror
* @ds_clp: nfs_client for the DS
* @inode: pointer to inode
*
* Find or create a DS rpc client with th MDS server rpc client auth flavor
* in the nfs_client cl_ds_clients list.
*/
struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx,
nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
struct nfs_client *ds_clp, struct inode *inode)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
switch (mirror->mirror_ds->ds_versions[0].version) {
case 3:
/* For NFSv3 DS, flavor is set when creating DS connections */
@ -608,7 +561,7 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)
if (IS_ERR(mirror->mirror_ds))
continue;
devid = &mirror->mirror_ds->id_node;
if (!ff_layout_test_devid_unavailable(devid))
if (!nfs4_test_deviceid_unavailable(devid))
return true;
}
}
@ -629,7 +582,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)
if (!mirror->mirror_ds)
continue;
devid = &mirror->mirror_ds->id_node;
if (ff_layout_test_devid_unavailable(devid))
if (nfs4_test_deviceid_unavailable(devid))
return false;
}

View File

@ -143,6 +143,7 @@ EXPORT_SYMBOL_GPL(nfs_sync_inode);
/**
* nfs_sync_mapping - helper to flush all mmapped dirty data to disk
* @mapping: pointer to struct address_space
*/
int nfs_sync_mapping(struct address_space *mapping)
{
@ -1184,8 +1185,8 @@ int nfs_attribute_cache_expired(struct inode *inode)
/**
* nfs_revalidate_inode - Revalidate the inode attributes
* @server - pointer to nfs_server struct
* @inode - pointer to inode struct
* @server: pointer to nfs_server struct
* @inode: pointer to inode struct
*
* Updates inode attribute information by retrieving the data from the server.
*/
@ -1255,8 +1256,8 @@ int nfs_revalidate_mapping_rcu(struct inode *inode)
/**
* nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
* @mapping - pointer to mapping
* @inode: pointer to host inode
* @mapping: pointer to mapping
*/
int nfs_revalidate_mapping(struct inode *inode,
struct address_space *mapping)
@ -1371,8 +1372,8 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/**
* nfs_check_inode_attributes - verify consistency of the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
* @inode: pointer to inode
* @fattr: updated attributes
*
* Verifies the attribute cache. If we have just changed the attributes,
* so that fattr carries weak cache consistency data, then it may
@ -1572,8 +1573,8 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
/**
* nfs_inode_attrs_need_update - check if the inode attributes need updating
* @inode - pointer to inode
* @fattr - attributes
* @inode: pointer to inode
* @fattr: attributes
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
@ -1614,8 +1615,8 @@ static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr
/**
* nfs_refresh_inode - try to update the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
* @inode: pointer to inode
* @fattr: updated attributes
*
* Check that an RPC call that returned attributes has not overlapped with
* other recent updates of the inode metadata, then decide whether it is
@ -1649,8 +1650,8 @@ static int nfs_post_op_update_inode_locked(struct inode *inode,
/**
* nfs_post_op_update_inode - try to update the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
* @inode: pointer to inode
* @fattr: updated attributes
*
* After an operation that has changed the inode metadata, mark the
* attribute cache as being invalid, then try to update it.
@ -1679,8 +1680,8 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
/**
* nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
* @inode: pointer to inode
* @fattr: updated attributes
*
* After an operation that has changed the inode metadata, mark the
* attribute cache as being invalid, then try to update it. Fake up
@ -1731,8 +1732,8 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
/**
* nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
* @inode: pointer to inode
* @fattr: updated attributes
*
* After an operation that has changed the inode metadata, mark the
* attribute cache as being invalid, then try to update it. Fake up

View File

@ -69,7 +69,8 @@ struct nfs_clone_mount {
* Maximum number of pages that readdir can use for creating
* a vmapped array of pages.
*/
#define NFS_MAX_READDIR_PAGES 8
#define NFS_MAX_READDIR_PAGES 64
#define NFS_MAX_READDIR_RAPAGES 8
struct nfs_client_initdata {
unsigned long init_flags;
@ -755,6 +756,7 @@ static inline bool nfs_error_is_fatal(int err)
{
switch (err) {
case -ERESTARTSYS:
case -EINTR:
case -EACCES:
case -EDQUOT:
case -EFBIG:
@ -763,6 +765,7 @@ static inline bool nfs_error_is_fatal(int err)
case -EROFS:
case -ESTALE:
case -E2BIG:
case -ENOMEM:
return true;
default:
return false;

View File

@ -25,7 +25,7 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
/**
* nfs_start_io_read - declare the file is being used for buffered reads
* @inode - file inode
* @inode: file inode
*
* Declare that a buffered read operation is about to start, and ensure
* that we block all direct I/O.
@ -56,7 +56,7 @@ nfs_start_io_read(struct inode *inode)
/**
* nfs_end_io_read - declare that the buffered read operation is done
* @inode - file inode
* @inode: file inode
*
* Declare that a buffered read operation is done, and release the shared
* lock on inode->i_rwsem.
@ -69,7 +69,7 @@ nfs_end_io_read(struct inode *inode)
/**
* nfs_start_io_write - declare the file is being used for buffered writes
* @inode - file inode
* @inode: file inode
*
* Declare that a buffered read operation is about to start, and ensure
* that we block all direct I/O.
@ -83,7 +83,7 @@ nfs_start_io_write(struct inode *inode)
/**
* nfs_end_io_write - declare that the buffered write operation is done
* @inode - file inode
* @inode: file inode
*
* Declare that a buffered write operation is done, and release the
* lock on inode->i_rwsem.
@ -105,7 +105,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
/**
* nfs_end_io_direct - declare the file is being used for direct i/o
* @inode - file inode
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure
* that we block all buffered I/O.
@ -136,7 +136,7 @@ nfs_start_io_direct(struct inode *inode)
/**
* nfs_end_io_direct - declare that the direct i/o operation is done
* @inode - file inode
* @inode: file inode
*
* Declare that a direct I/O operation is done, and release the shared
* lock on inode->i_rwsem.

View File

@ -221,10 +221,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
/**
* nfs_do_submount - set up mountpoint when crossing a filesystem boundary
* @dentry - parent directory
* @fh - filehandle for new root dentry
* @fattr - attributes for new root inode
* @authflavor - security flavor to use when performing the mount
* @dentry: parent directory
* @fh: filehandle for new root dentry
* @fattr: attributes for new root inode
* @authflavor: security flavor to use when performing the mount
*
*/
struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,

View File

@ -22,6 +22,7 @@
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include "nfstrace.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_XDR
@ -55,41 +56,15 @@
#define NFS_attrstat_sz (1+NFS_fattr_sz)
#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
#define NFS_readlinkres_sz (2)
#define NFS_readres_sz (1+NFS_fattr_sz+1)
#define NFS_readlinkres_sz (2+1)
#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
#define NFS_writeres_sz (NFS_attrstat_sz)
#define NFS_stat_sz (1)
#define NFS_readdirres_sz (1)
#define NFS_readdirres_sz (1+1)
#define NFS_statfsres_sz (1+NFS_info_sz)
static int nfs_stat_to_errno(enum nfs_stat);
/*
* While encoding arguments, set up the reply buffer in advance to
* receive reply data directly into the page cache.
*/
static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int bufsize)
{
struct rpc_auth *auth = req->rq_cred->cr_auth;
unsigned int replen;
replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
}
/*
* Handle decode buffer overflows out-of-line.
*/
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
dprintk("NFS: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
/*
* Encode/decode NFSv2 basic data types
*
@ -110,8 +85,8 @@ static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
count = be32_to_cpup(p);
recvd = xdr_read_pages(xdr, count);
if (unlikely(count > recvd))
@ -125,9 +100,6 @@ static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
"count %u > recvd %u\n", count, recvd);
count = recvd;
goto out;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -157,13 +129,16 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
*status = be32_to_cpup(p);
if (unlikely(!p))
return -EIO;
if (unlikely(*p != cpu_to_be32(NFS_OK)))
goto out_status;
*status = 0;
return 0;
out_status:
*status = be32_to_cpup(p);
trace_nfs_xdr_status((int)*status);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -205,14 +180,11 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
__be32 *p;
p = xdr_inline_decode(xdr, NFS2_FHSIZE);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
fh->size = NFS2_FHSIZE;
memcpy(fh->data, p, NFS2_FHSIZE);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -282,8 +254,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;
p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
fattr->valid |= NFS_ATTR_FATTR_V2;
@ -325,9 +297,6 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
out_gid:
dprintk("NFS: returned invalid gid\n");
return -EINVAL;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -416,23 +385,20 @@ static int decode_filename_inline(struct xdr_stream *xdr,
u32 count;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
count = be32_to_cpup(p);
if (count > NFS3_MAXNAMLEN)
goto out_nametoolong;
p = xdr_inline_decode(xdr, count);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
*name = (const char *)p;
*length = count;
return 0;
out_nametoolong:
dprintk("NFS: returned filename too long: %u\n", count);
return -ENAMETOOLONG;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -455,8 +421,8 @@ static int decode_path(struct xdr_stream *xdr)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
length = be32_to_cpup(p);
if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
goto out_size;
@ -472,9 +438,6 @@ static int decode_path(struct xdr_stream *xdr)
dprintk("NFS: server cheating in pathname result: "
"length %u > received %u\n", length, recvd);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -615,8 +578,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
const struct nfs_readlinkargs *args = data;
encode_fhandle(xdr, args->fh);
prepare_reply_buffer(req, args->pages, args->pgbase,
args->pglen, NFS_readlinkres_sz);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->pglen, NFS_readlinkres_sz);
}
/*
@ -651,8 +614,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
const struct nfs_pgio_args *args = data;
encode_readargs(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
args->count, NFS_readres_sz);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->count, NFS_readres_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}
@ -809,8 +772,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
const struct nfs_readdirargs *args = data;
encode_readdirargs(xdr, args);
prepare_reply_buffer(req, args->pages, 0,
args->count, NFS_readdirres_sz);
rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS_readdirres_sz);
}
/*
@ -951,12 +914,12 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
int error;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
if (*p++ == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
if (*p++ == xdr_zero)
return -EAGAIN;
entry->eof = 1;
@ -964,8 +927,8 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
}
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
entry->ino = be32_to_cpup(p);
error = decode_filename_inline(xdr, &entry->name, &entry->len);
@ -978,17 +941,13 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
*/
entry->prev_cookie = entry->cookie;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
entry->cookie = be32_to_cpup(p);
entry->d_type = DT_UNKNOWN;
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EAGAIN;
}
/*
@ -1052,17 +1011,14 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
__be32 *p;
p = xdr_inline_decode(xdr, NFS_info_sz << 2);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
result->tsize = be32_to_cpup(p++);
result->bsize = be32_to_cpup(p++);
result->blocks = be32_to_cpup(p++);
result->bfree = be32_to_cpup(p++);
result->bavail = be32_to_cpup(p);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,

View File

@ -222,8 +222,6 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
switch (status) {
case 0:
status = nfs_refresh_inode(inode, fattr);
set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:

View File

@ -21,6 +21,7 @@
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfsacl.h>
#include "nfstrace.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_XDR
@ -68,13 +69,13 @@
#define NFS3_removeres_sz (NFS3_setattrres_sz)
#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
@ -84,7 +85,7 @@
#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
static int nfs3_stat_to_errno(enum nfs_stat);
@ -103,32 +104,6 @@ static const umode_t nfs_type2fmt[] = {
[NF3FIFO] = S_IFIFO,
};
/*
* While encoding arguments, set up the reply buffer in advance to
* receive reply data directly into the page cache.
*/
static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int bufsize)
{
struct rpc_auth *auth = req->rq_cred->cr_auth;
unsigned int replen;
replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
}
/*
* Handle decode buffer overflows out-of-line.
*/
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
dprintk("NFS: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
/*
* Encode/decode NFSv3 basic data types
*
@ -151,13 +126,10 @@ static int decode_uint32(struct xdr_stream *xdr, u32 *value)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
*value = be32_to_cpup(p);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_uint64(struct xdr_stream *xdr, u64 *value)
@ -165,13 +137,10 @@ static int decode_uint64(struct xdr_stream *xdr, u64 *value)
__be32 *p;
p = xdr_inline_decode(xdr, 8);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
xdr_decode_hyper(p, value);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -211,14 +180,14 @@ static int decode_inline_filename3(struct xdr_stream *xdr,
u32 count;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
count = be32_to_cpup(p);
if (count > NFS3_MAXNAMLEN)
goto out_nametoolong;
p = xdr_inline_decode(xdr, count);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
*name = (const char *)p;
*length = count;
return 0;
@ -226,9 +195,6 @@ static int decode_inline_filename3(struct xdr_stream *xdr,
out_nametoolong:
dprintk("NFS: returned filename too long: %u\n", count);
return -ENAMETOOLONG;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -249,8 +215,8 @@ static int decode_nfspath3(struct xdr_stream *xdr)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
count = be32_to_cpup(p);
if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
goto out_nametoolong;
@ -267,9 +233,6 @@ static int decode_nfspath3(struct xdr_stream *xdr)
dprintk("NFS: server cheating in pathname result: "
"count %u > recvd %u\n", count, recvd);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -303,13 +266,10 @@ static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
__be32 *p;
p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -330,13 +290,10 @@ static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *
__be32 *p;
p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
memcpy(verifier->data, p, NFS3_WRITEVERFSIZE);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -364,13 +321,16 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
*status = be32_to_cpup(p);
if (unlikely(!p))
return -EIO;
if (unlikely(*p != cpu_to_be32(NFS3_OK)))
goto out_status;
*status = 0;
return 0;
out_status:
*status = be32_to_cpup(p);
trace_nfs_xdr_status((int)*status);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -453,23 +413,20 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
length = be32_to_cpup(p++);
if (unlikely(length > NFS3_FHSIZE))
goto out_toobig;
p = xdr_inline_decode(xdr, length);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
fh->size = length;
memcpy(fh->data, p, length);
return 0;
out_toobig:
dprintk("NFS: file handle size (%u) too big\n", length);
return -E2BIG;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static void zero_nfs_fh3(struct nfs_fh *fh)
@ -655,8 +612,8 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;
p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
p = xdr_decode_ftype3(p, &fmode);
@ -690,9 +647,6 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
out_gid:
dprintk("NFS: returned invalid gid\n");
return -EINVAL;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -710,14 +664,11 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
if (*p != xdr_zero)
return decode_fattr3(xdr, fattr);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -733,8 +684,8 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;
p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
fattr->valid |= NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PRECHANGE
@ -747,9 +698,6 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -773,14 +721,11 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
if (*p != xdr_zero)
return decode_wcc_attr(xdr, fattr);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
@ -808,15 +753,12 @@ static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
{
__be32 *p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
if (*p != xdr_zero)
return decode_nfs_fh3(xdr, fh);
zero_nfs_fh3(fh);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
@ -953,8 +895,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
const struct nfs3_readlinkargs *args = data;
encode_nfs_fh3(xdr, args->fh);
prepare_reply_buffer(req, args->pages, args->pgbase,
args->pglen, NFS3_readlinkres_sz);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->pglen, NFS3_readlinkres_sz);
}
/*
@ -986,8 +928,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
encode_read3args(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
args->count, replen);
rpc_prepare_reply_pages(req, args->pages, args->pgbase,
args->count, replen);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}
@ -1279,7 +1221,7 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;
encode_readdir3args(xdr, args);
prepare_reply_buffer(req, args->pages, 0,
rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS3_readdirres_sz);
}
@ -1321,7 +1263,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;
encode_readdirplus3args(xdr, args);
prepare_reply_buffer(req, args->pages, 0,
rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS3_readdirres_sz);
}
@ -1366,7 +1308,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
encode_nfs_fh3(xdr, args->fh);
encode_uint32(xdr, args->mask);
if (args->mask & (NFS_ACL | NFS_DFACL)) {
prepare_reply_buffer(req, args->pages, 0,
rpc_prepare_reply_pages(req, args->pages, 0,
NFSACL_MAXPAGES << PAGE_SHIFT,
ACL3_getaclres_sz);
req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
@ -1643,8 +1585,8 @@ static int decode_read3resok(struct xdr_stream *xdr,
__be32 *p;
p = xdr_inline_decode(xdr, 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
count = be32_to_cpup(p++);
eof = be32_to_cpup(p++);
ocount = be32_to_cpup(p++);
@ -1667,9 +1609,6 @@ static int decode_read3resok(struct xdr_stream *xdr,
count = recvd;
eof = 0;
goto out;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
@ -1690,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
error = decode_read3resok(xdr, result);
out:
return error;
@ -1731,22 +1670,18 @@ static int decode_write3resok(struct xdr_stream *xdr,
__be32 *p;
p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
result->count = be32_to_cpup(p++);
result->verf->committed = be32_to_cpup(p++);
if (unlikely(result->verf->committed > NFS_FILE_SYNC))
goto out_badvalue;
if (decode_writeverf3(xdr, &result->verf->verifier))
goto out_eio;
return -EIO;
return result->count;
out_badvalue:
dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
out_eio:
return -EIO;
}
static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
@ -2010,12 +1945,12 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
u64 new_cookie;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
if (*p == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
if (*p == xdr_zero)
return -EAGAIN;
entry->eof = 1;
@ -2051,8 +1986,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
/* In fact, a post_op_fh3: */
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EAGAIN;
if (*p != xdr_zero) {
error = decode_nfs_fh3(xdr, entry->fh);
if (unlikely(error)) {
@ -2069,9 +2004,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EAGAIN;
out_truncated:
dprintk("NFS: directory entry contains invalid file handle\n");
*entry = old;
@ -2183,8 +2115,8 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
__be32 *p;
p = xdr_inline_decode(xdr, 8 * 6 + 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
p = xdr_decode_size3(p, &result->tbytes);
p = xdr_decode_size3(p, &result->fbytes);
p = xdr_decode_size3(p, &result->abytes);
@ -2193,9 +2125,6 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
xdr_decode_size3(p, &result->afiles);
/* ignore invarsec */
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
@ -2255,8 +2184,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
__be32 *p;
p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
result->rtmax = be32_to_cpup(p++);
result->rtpref = be32_to_cpup(p++);
result->rtmult = be32_to_cpup(p++);
@ -2270,9 +2199,6 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
/* ignore properties */
result->lease_time = 0;
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
@ -2328,15 +2254,12 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
__be32 *p;
p = xdr_inline_decode(xdr, 4 * 6);
if (unlikely(p == NULL))
goto out_overflow;
if (unlikely(!p))
return -EIO;
result->max_link = be32_to_cpup(p++);
result->max_namelen = be32_to_cpup(p);
/* ignore remaining fields */
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,

View File

@ -20,5 +20,8 @@ loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
struct nfs42_layoutstat_data *);
int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
const struct nfs42_layout_error *errors,
size_t n);
#endif /* __LINUX_FS_NFS_NFS4_2_H */

View File

@ -672,6 +672,170 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
return 0;
}
static struct nfs42_layouterror_data *
nfs42_alloc_layouterror_data(struct pnfs_layout_segment *lseg, gfp_t gfp_flags)
{
struct nfs42_layouterror_data *data;
struct inode *inode = lseg->pls_layout->plh_inode;
data = kzalloc(sizeof(*data), gfp_flags);
if (data) {
data->args.inode = data->inode = nfs_igrab_and_active(inode);
if (data->inode) {
data->lseg = pnfs_get_lseg(lseg);
if (data->lseg)
return data;
nfs_iput_and_deactive(data->inode);
}
kfree(data);
}
return NULL;
}
static void
nfs42_free_layouterror_data(struct nfs42_layouterror_data *data)
{
pnfs_put_lseg(data->lseg);
nfs_iput_and_deactive(data->inode);
kfree(data);
}
static void
nfs42_layouterror_prepare(struct rpc_task *task, void *calldata)
{
struct nfs42_layouterror_data *data = calldata;
struct inode *inode = data->inode;
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo = data->lseg->pls_layout;
unsigned i;
spin_lock(&inode->i_lock);
if (!pnfs_layout_is_valid(lo)) {
spin_unlock(&inode->i_lock);
rpc_exit(task, 0);
return;
}
for (i = 0; i < data->args.num_errors; i++)
nfs4_stateid_copy(&data->args.errors[i].stateid,
&lo->plh_stateid);
spin_unlock(&inode->i_lock);
nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
&data->res.seq_res, task);
}
static void
nfs42_layouterror_done(struct rpc_task *task, void *calldata)
{
struct nfs42_layouterror_data *data = calldata;
struct inode *inode = data->inode;
struct pnfs_layout_hdr *lo = data->lseg->pls_layout;
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
switch (task->tk_status) {
case 0:
break;
case -NFS4ERR_BADHANDLE:
case -ESTALE:
pnfs_destroy_layout(NFS_I(inode));
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_BAD_STATEID:
spin_lock(&inode->i_lock);
if (pnfs_layout_is_valid(lo) &&
nfs4_stateid_match(&data->args.errors[0].stateid,
&lo->plh_stateid)) {
LIST_HEAD(head);
/*
* Mark the bad layout state as invalid, then retry
* with the current stateid.
*/
pnfs_mark_layout_stateid_invalid(lo, &head);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
nfs_commit_inode(inode, 0);
} else
spin_unlock(&inode->i_lock);
break;
case -NFS4ERR_OLD_STATEID:
spin_lock(&inode->i_lock);
if (pnfs_layout_is_valid(lo) &&
nfs4_stateid_match_other(&data->args.errors[0].stateid,
&lo->plh_stateid)) {
/* Do we need to delay before resending? */
if (!nfs4_stateid_is_newer(&lo->plh_stateid,
&data->args.errors[0].stateid))
rpc_delay(task, HZ);
rpc_restart_call_prepare(task);
}
spin_unlock(&inode->i_lock);
break;
case -ENOTSUPP:
case -EOPNOTSUPP:
NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR;
}
}
static void
nfs42_layouterror_release(void *calldata)
{
struct nfs42_layouterror_data *data = calldata;
nfs42_free_layouterror_data(data);
}
static const struct rpc_call_ops nfs42_layouterror_ops = {
.rpc_call_prepare = nfs42_layouterror_prepare,
.rpc_call_done = nfs42_layouterror_done,
.rpc_release = nfs42_layouterror_release,
};
int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
const struct nfs42_layout_error *errors, size_t n)
{
struct inode *inode = lseg->pls_layout->plh_inode;
struct nfs42_layouterror_data *data;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTERROR],
};
struct rpc_task_setup task_setup = {
.rpc_message = &msg,
.callback_ops = &nfs42_layouterror_ops,
.flags = RPC_TASK_ASYNC,
};
unsigned int i;
if (!nfs_server_capable(inode, NFS_CAP_LAYOUTERROR))
return -EOPNOTSUPP;
if (n > NFS42_LAYOUTERROR_MAX)
return -EINVAL;
data = nfs42_alloc_layouterror_data(lseg, GFP_NOFS);
if (!data)
return -ENOMEM;
for (i = 0; i < n; i++) {
data->args.errors[i] = errors[i];
data->args.num_errors++;
data->res.num_errors++;
}
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
task_setup.callback_data = data;
task_setup.rpc_client = NFS_SERVER(inode)->client;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0);
task = rpc_run_task(&task_setup);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
return 0;
}
EXPORT_SYMBOL_GPL(nfs42_proc_layouterror);
static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct file *dst_f, struct nfs_lock_context *src_lock,
struct nfs_lock_context *dst_lock, loff_t src_offset,

View File

@ -51,6 +51,15 @@
1 /* opaque devaddr4 length */ + \
XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
#define decode_layoutstats_maxsz (op_decode_hdr_maxsz)
#define encode_device_error_maxsz (XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \
1 /* status */ + 1 /* opnum */)
#define encode_layouterror_maxsz (op_decode_hdr_maxsz + \
2 /* offset */ + \
2 /* length */ + \
encode_stateid_maxsz + \
1 /* Array size */ + \
encode_device_error_maxsz)
#define decode_layouterror_maxsz (op_decode_hdr_maxsz)
#define encode_clone_maxsz (encode_stateid_maxsz + \
encode_stateid_maxsz + \
2 /* src offset */ + \
@ -59,43 +68,53 @@
#define decode_clone_maxsz (op_decode_hdr_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_allocate_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_allocate_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
encode_copy_maxsz + \
encode_commit_maxsz)
#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
decode_copy_maxsz + \
decode_commit_maxsz)
#define NFS4_enc_offload_cancel_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_offload_cancel_maxsz)
#define NFS4_dec_offload_cancel_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_deallocate_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_seek_maxsz)
#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_seek_maxsz)
#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \
@ -106,6 +125,16 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
#define NFS4_enc_layouterror_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
NFS42_LAYOUTERROR_MAX * \
encode_layouterror_maxsz)
#define NFS4_dec_layouterror_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
NFS42_LAYOUTERROR_MAX * \
decode_layouterror_maxsz)
#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@ -223,6 +252,34 @@ static void encode_clone(struct xdr_stream *xdr,
xdr_encode_hyper(p, args->count);
}
static void encode_device_error(struct xdr_stream *xdr,
const struct nfs42_device_error *error)
{
__be32 *p;
p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 2*4);
p = xdr_encode_opaque_fixed(p, error->dev_id.data,
NFS4_DEVICEID4_SIZE);
*p++ = cpu_to_be32(error->status);
*p = cpu_to_be32(error->opnum);
}
static void encode_layouterror(struct xdr_stream *xdr,
const struct nfs42_layout_error *args,
struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_LAYOUTERROR, decode_layouterror_maxsz, hdr);
p = reserve_space(xdr, 8 + 8);
p = xdr_encode_hyper(p, args->offset);
p = xdr_encode_hyper(p, args->length);
encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(1);
encode_device_error(xdr, &args->errors[0]);
}
/*
* Encode ALLOCATE request
*/
@ -381,6 +438,27 @@ static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
encode_nops(&hdr);
}
/*
* Encode LAYOUTERROR request
*/
static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req,
struct xdr_stream *xdr,
const void *data)
{
const struct nfs42_layouterror_args *args = data;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
int i;
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, NFS_FH(args->inode), &hdr);
for (i = 0; i < args->num_errors; i++)
encode_layouterror(xdr, &args->errors[i], &hdr);
encode_nops(&hdr);
}
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
@ -394,7 +472,7 @@ static int decode_write_response(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
return -EIO;
count = be32_to_cpup(p);
if (count > 1)
return -EREMOTEIO;
@ -402,18 +480,14 @@ static int decode_write_response(struct xdr_stream *xdr,
status = decode_opaque_fixed(xdr, &res->stateid,
NFS4_STATEID_SIZE);
if (unlikely(status))
goto out_overflow;
return -EIO;
}
p = xdr_inline_decode(xdr, 8 + 4);
if (unlikely(!p))
goto out_overflow;
return -EIO;
p = xdr_decode_hyper(p, &res->count);
res->verifier.committed = be32_to_cpup(p);
return decode_verifier(xdr, &res->verifier.verifier);
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_copy_requirements(struct xdr_stream *xdr,
@ -422,14 +496,11 @@ static int decode_copy_requirements(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(!p))
goto out_overflow;
return -EIO;
res->consecutive = be32_to_cpup(p++);
res->synchronous = be32_to_cpup(p++);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
@ -474,15 +545,11 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
p = xdr_inline_decode(xdr, 4 + 8);
if (unlikely(!p))
goto out_overflow;
return -EIO;
res->sr_eof = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &res->sr_offset);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_layoutstats(struct xdr_stream *xdr)
@ -495,6 +562,11 @@ static int decode_clone(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_CLONE);
}
static int decode_layouterror(struct xdr_stream *xdr)
{
return decode_op_hdr(xdr, OP_LAYOUTERROR);
}
/*
* Decode ALLOCATE request
*/
@ -704,4 +776,30 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
return status;
}
/*
* Decode LAYOUTERROR request
*/
static int nfs4_xdr_dec_layouterror(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
void *data)
{
struct nfs42_layouterror_res *res = data;
struct compound_hdr hdr;
int status, i;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
for (i = 0; i < res->num_errors && status == 0; i++)
status = decode_layouterror(xdr);
out:
res->rpc_status = status;
return status;
}
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */

View File

@ -42,7 +42,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
}
#ifdef CONFIG_NFS_V4_1
/**
/*
* Per auth flavor data server rpc clients
*/
struct nfs4_ds_server {
@ -51,7 +51,9 @@ struct nfs4_ds_server {
};
/**
* Common lookup case for DS I/O
* nfs4_find_ds_client - Common lookup case for DS I/O
* @ds_clp: pointer to the DS's nfs_client
* @flavor: rpc auth flavour to match
*/
static struct nfs4_ds_server *
nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
@ -118,9 +120,13 @@ nfs4_free_ds_server(struct nfs4_ds_server *dss)
}
/**
* Find or create a DS rpc client with th MDS server rpc client auth flavor
* in the nfs_client cl_ds_clients list.
*/
* nfs4_find_or_create_ds_client - Find or create a DS rpc client
* @ds_clp: pointer to the DS's nfs_client
* @inode: pointer to the inode
*
* Find or create a DS rpc client with th MDS server rpc client auth flavor
* in the nfs_client cl_ds_clients list.
*/
struct rpc_clnt *
nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode)
{
@ -145,7 +151,6 @@ static void
nfs4_shutdown_ds_clients(struct nfs_client *clp)
{
struct nfs4_ds_server *dss;
LIST_HEAD(shutdown_list);
while (!list_empty(&clp->cl_ds_clients)) {
dss = list_entry(clp->cl_ds_clients.next,
@ -284,7 +289,7 @@ static int nfs4_init_callback(struct nfs_client *clp)
/**
* nfs40_init_client - nfs_client initialization tasks for NFSv4.0
* @clp - nfs_client to initialize
* @clp: nfs_client to initialize
*
* Returns zero on success, or a negative errno if some error occurred.
*/
@ -312,7 +317,7 @@ int nfs40_init_client(struct nfs_client *clp)
/**
* nfs41_init_client - nfs_client initialization tasks for NFSv4.1+
* @clp - nfs_client to initialize
* @clp: nfs_client to initialize
*
* Returns zero on success, or a negative errno if some error occurred.
*/
@ -360,9 +365,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
* nfs4_init_client - Initialise an NFS4 client record
*
* @clp: nfs_client to initialise
* @timeparms: timeout parameters for underlying RPC transport
* @ip_addr: callback IP address in presentation format
* @authflavor: authentication flavor for underlying RPC transport
* @cl_init: pointer to nfs_client_initdata
*
* Returns pointer to an NFS client, or an ERR_PTR value.
*/
@ -649,13 +652,13 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1,
/**
* nfs4_detect_session_trunking - Checks for session trunking.
*
* Called after a successful EXCHANGE_ID on a multi-addr connection.
* Upon success, add the transport.
*
* @clp: original mount nfs_client
* @res: result structure from an exchange_id using the original mount
* nfs_client with a new multi_addr transport
* @xprt: pointer to the transport to add.
*
* Called after a successful EXCHANGE_ID on a multi-addr connection.
* Upon success, add the transport.
*
* Returns zero on success, otherwise -EINVAL
*

View File

@ -137,6 +137,7 @@ static size_t nfs_parse_server_name(char *string, size_t len,
/**
* nfs_find_best_sec - Find a security mechanism supported locally
* @clnt: pointer to rpc_clnt
* @server: NFS server struct
* @flavors: List of security tuples returned by SECINFO procedure
*
@ -288,8 +289,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
/**
* nfs_follow_referral - set up mountpoint when hitting a referral on moved error
* @dentry - parent directory
* @locations - array of NFSv4 server location information
* @dentry: parent directory
* @locations: array of NFSv4 server location information
*
*/
static struct vfsmount *nfs_follow_referral(struct dentry *dentry,

View File

@ -730,33 +730,41 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
res->sr_slot = NULL;
}
static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot,
u32 seqnr)
{
if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0)
slot->seq_nr_highest_sent = seqnr;
}
static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
u32 seqnr)
{
slot->seq_nr_highest_sent = seqnr;
slot->seq_nr_last_acked = seqnr;
}
static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
struct nfs4_slot *slot = res->sr_slot;
struct nfs_client *clp;
bool interrupted = false;
int ret = 1;
if (slot == NULL)
goto out_noaction;
/* don't increment the sequence number if the task wasn't sent */
if (!RPC_WAS_SENT(task))
if (!RPC_WAS_SENT(task) || slot->seq_done)
goto out;
session = slot->table->session;
if (slot->interrupted) {
if (res->sr_status != -NFS4ERR_DELAY)
slot->interrupted = 0;
interrupted = true;
}
trace_nfs4_sequence_done(session, res);
/* Check the SEQUENCE operation status */
switch (res->sr_status) {
case 0:
/* Mark this sequence number as having been acked */
nfs4_slot_sequence_acked(slot, slot->seq_nr);
/* Update the slot's sequence and clientid lease timer */
slot->seq_done = 1;
clp = session->clp;
@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task,
* sr_status remains 1 if an RPC level error occurred.
* The server may or may not have processed the sequence
* operation..
* Mark the slot as having hosted an interrupted RPC call.
*/
slot->interrupted = 1;
nfs4_slot_sequence_record_sent(slot, slot->seq_nr);
slot->seq_done = 1;
goto out;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
__func__,
slot->slot_nr,
slot->seq_nr);
nfs4_slot_sequence_acked(slot, slot->seq_nr);
goto out_retry;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_SEQ_FALSE_RETRY:
@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
* The server thinks we tried to replay a request.
* Retry the call after bumping the sequence ID.
*/
nfs4_slot_sequence_acked(slot, slot->seq_nr);
goto retry_new_seq;
case -NFS4ERR_BADSLOT:
/*
@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task,
goto session_recover;
goto retry_nowait;
case -NFS4ERR_SEQ_MISORDERED:
nfs4_slot_sequence_record_sent(slot, slot->seq_nr);
/*
* Was the last operation on this sequence interrupted?
* If so, retry after bumping the sequence number.
* Were one or more calls using this slot interrupted?
* If the server never received the request, then our
* transmitted slot sequence number may be too high.
*/
if (interrupted)
goto retry_new_seq;
/*
* Could this slot have been previously retired?
* If so, then the server may be expecting seq_nr = 1!
*/
if (slot->seq_nr != 1) {
slot->seq_nr = 1;
if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
slot->seq_nr--;
goto retry_nowait;
}
goto session_recover;
/*
* RFC5661:
* A retry might be sent while the original request is
* still in progress on the replier. The replier SHOULD
* deal with the issue by returning NFS4ERR_DELAY as the
* reply to SEQUENCE or CB_SEQUENCE operation, but
* implementations MAY return NFS4ERR_SEQ_MISORDERED.
*
* Restart the search after a delay.
*/
slot->seq_nr = slot->seq_nr_highest_sent;
goto out_retry;
default:
/* Just update the slot sequence no. */
slot->seq_done = 1;
@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
.rpc_call_done = nfs41_call_sync_done,
};
static void
nfs4_sequence_process_interrupted(struct nfs_client *client,
struct nfs4_slot *slot, const struct cred *cred)
{
struct rpc_task *task;
task = _nfs41_proc_sequence(client, cred, slot, true);
if (!IS_ERR(task))
rpc_put_task_async(task);
}
#else /* !CONFIG_NFS_V4_1 */
static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@ -937,16 +943,15 @@ int nfs4_sequence_done(struct rpc_task *task,
}
EXPORT_SYMBOL_GPL(nfs4_sequence_done);
static void
nfs4_sequence_process_interrupted(struct nfs_client *client,
struct nfs4_slot *slot, const struct cred *cred)
{
WARN_ON_ONCE(1);
slot->interrupted = 0;
}
#endif /* !CONFIG_NFS_V4_1 */
static void nfs41_sequence_res_init(struct nfs4_sequence_res *res)
{
res->sr_timestamp = jiffies;
res->sr_status_flags = 0;
res->sr_status = 1;
}
static
void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
@ -958,10 +963,6 @@ void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
args->sa_slot = slot;
res->sr_slot = slot;
res->sr_timestamp = jiffies;
res->sr_status_flags = 0;
res->sr_status = 1;
}
int nfs4_setup_sequence(struct nfs_client *client,
@ -982,31 +983,25 @@ int nfs4_setup_sequence(struct nfs_client *client,
task->tk_timeout = 0;
}
for (;;) {
spin_lock(&tbl->slot_tbl_lock);
/* The state manager will wait until the slot table is empty */
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
goto out_sleep;
spin_lock(&tbl->slot_tbl_lock);
/* The state manager will wait until the slot table is empty */
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
goto out_sleep;
slot = nfs4_alloc_slot(tbl);
if (IS_ERR(slot)) {
/* Try again in 1/4 second */
if (slot == ERR_PTR(-ENOMEM))
task->tk_timeout = HZ >> 2;
goto out_sleep;
}
spin_unlock(&tbl->slot_tbl_lock);
if (likely(!slot->interrupted))
break;
nfs4_sequence_process_interrupted(client,
slot, task->tk_msg.rpc_cred);
slot = nfs4_alloc_slot(tbl);
if (IS_ERR(slot)) {
/* Try again in 1/4 second */
if (slot == ERR_PTR(-ENOMEM))
task->tk_timeout = HZ >> 2;
goto out_sleep;
}
spin_unlock(&tbl->slot_tbl_lock);
nfs4_sequence_attach_slot(args, res, slot);
trace_nfs4_setup_sequence(session, args);
out_start:
nfs41_sequence_res_init(res);
rpc_call_start(task);
return 0;
@ -1555,6 +1550,10 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
static void nfs_set_open_stateid_locked(struct nfs4_state *state,
const nfs4_stateid *stateid, nfs4_stateid *freeme)
__must_hold(&state->owner->so_lock)
__must_hold(&state->seqlock)
__must_hold(RCU)
{
DEFINE_WAIT(wait);
int status = 0;
@ -5963,7 +5962,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
/**
* nfs4_proc_setclientid_confirm - Confirm client ID
* @clp: state data structure
* @res: result of a previous SETCLIENTID
* @arg: result of a previous SETCLIENTID
* @cred: credential to use for this call
*
* Returns zero, a negative errno, or a negative NFS4ERR status code.
@ -7527,7 +7526,7 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
return status;
}
/**
/*
* If 'use_integrity' is true and the state managment nfs_client
* cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
* and the machine credential as per RFC3530bis and RFC5661 Security
@ -8937,10 +8936,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
if (status != 0)
goto out;
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (task->tk_status < 0 || lgp->res.layoutp->len == 0) {
if (task->tk_status < 0) {
status = nfs4_layoutget_handle_exception(task, lgp, &exception);
*timeout = exception.timeout;
} else if (lgp->res.layoutp->len == 0) {
status = -EAGAIN;
*timeout = nfs4_update_delay(&exception.timeout);
} else
lseg = pnfs_layout_process(lgp);
out:
@ -9219,7 +9220,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
return status;
}
/**
/*
* Use the state managment nfs_client cl_rpcclient, which uses krb5i (if
* possible) as per RFC3530bis and RFC5661 Security Considerations sections
*/
@ -9484,7 +9485,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = {
* @server: server / transport on which to perform the operation
* @stateid: state ID to release
* @cred: credential
* @is_recovery: set to true if this call needs to be privileged
* @privileged: set to true if this call needs to be privileged
*
* Note: this function is always asynchronous.
*/
@ -9691,7 +9692,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE,
| NFS_CAP_CLONE
| NFS_CAP_LAYOUTERROR,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,

View File

@ -55,7 +55,7 @@ static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize)
/**
* nfs4_slot_tbl_drain_complete - wake waiters when drain is complete
* @tbl - controlling slot table
* @tbl: controlling slot table
*
*/
void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl,
slot->table = tbl;
slot->slot_nr = slotid;
slot->seq_nr = seq_init;
slot->seq_nr_highest_sent = seq_init;
slot->seq_nr_last_acked = seq_init - 1;
}
return slot;
}
@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
p = &tbl->slots;
while (*p) {
(*p)->seq_nr = ivalue;
(*p)->interrupted = 0;
(*p)->seq_nr_highest_sent = ivalue;
(*p)->seq_nr_last_acked = ivalue - 1;
p = &(*p)->next;
}
tbl->highest_used_slotid = NFS4_NO_SLOT;

View File

@ -10,7 +10,7 @@
/* maximum number of slots to use */
#define NFS4_DEF_SLOT_TABLE_SIZE (64U)
#define NFS4_DEF_CB_SLOT_TABLE_SIZE (1U)
#define NFS4_DEF_CB_SLOT_TABLE_SIZE (16U)
#define NFS4_MAX_SLOT_TABLE (1024U)
#define NFS4_NO_SLOT ((u32)-1)
@ -23,8 +23,9 @@ struct nfs4_slot {
unsigned long generation;
u32 slot_nr;
u32 seq_nr;
unsigned int interrupted : 1,
privileged : 1,
u32 seq_nr_last_acked;
u32 seq_nr_highest_sent;
unsigned int privileged : 1,
seq_done : 1;
};

View File

@ -563,6 +563,7 @@ static void nfs4_gc_state_owners(struct nfs_server *server)
* nfs4_get_state_owner - Look up a state owner given a credential
* @server: nfs_server to search
* @cred: RPC credential to match
* @gfp_flags: allocation mode
*
* Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
*/

View File

@ -524,6 +524,31 @@ TRACE_EVENT(nfs4_setup_sequence,
)
);
TRACE_EVENT(nfs4_xdr_status,
TP_PROTO(
u32 op,
int error
),
TP_ARGS(op, error),
TP_STRUCT__entry(
__field(u32, op)
__field(int, error)
),
TP_fast_assign(
__entry->op = op;
__entry->error = -error;
),
TP_printk(
"operation %d: nfs status %d (%s)",
__entry->op,
__entry->error, show_nfsv4_errors(__entry->error)
)
);
DECLARE_EVENT_CLASS(nfs4_open_event,
TP_PROTO(
const struct nfs_open_context *ctx,

File diff suppressed because it is too large Load Diff

View File

@ -11,3 +11,4 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);

View File

@ -969,6 +969,91 @@ TRACE_EVENT(nfs_commit_done,
)
);
TRACE_DEFINE_ENUM(NFS_OK);
TRACE_DEFINE_ENUM(NFSERR_PERM);
TRACE_DEFINE_ENUM(NFSERR_NOENT);
TRACE_DEFINE_ENUM(NFSERR_IO);
TRACE_DEFINE_ENUM(NFSERR_NXIO);
TRACE_DEFINE_ENUM(NFSERR_ACCES);
TRACE_DEFINE_ENUM(NFSERR_EXIST);
TRACE_DEFINE_ENUM(NFSERR_XDEV);
TRACE_DEFINE_ENUM(NFSERR_NODEV);
TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
TRACE_DEFINE_ENUM(NFSERR_ISDIR);
TRACE_DEFINE_ENUM(NFSERR_INVAL);
TRACE_DEFINE_ENUM(NFSERR_FBIG);
TRACE_DEFINE_ENUM(NFSERR_NOSPC);
TRACE_DEFINE_ENUM(NFSERR_ROFS);
TRACE_DEFINE_ENUM(NFSERR_MLINK);
TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
TRACE_DEFINE_ENUM(NFSERR_DQUOT);
TRACE_DEFINE_ENUM(NFSERR_STALE);
TRACE_DEFINE_ENUM(NFSERR_REMOTE);
TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
#define nfs_show_status(x) \
__print_symbolic(x, \
{ NFS_OK, "OK" }, \
{ NFSERR_PERM, "PERM" }, \
{ NFSERR_NOENT, "NOENT" }, \
{ NFSERR_IO, "IO" }, \
{ NFSERR_NXIO, "NXIO" }, \
{ NFSERR_ACCES, "ACCES" }, \
{ NFSERR_EXIST, "EXIST" }, \
{ NFSERR_XDEV, "XDEV" }, \
{ NFSERR_NODEV, "NODEV" }, \
{ NFSERR_NOTDIR, "NOTDIR" }, \
{ NFSERR_ISDIR, "ISDIR" }, \
{ NFSERR_INVAL, "INVAL" }, \
{ NFSERR_FBIG, "FBIG" }, \
{ NFSERR_NOSPC, "NOSPC" }, \
{ NFSERR_ROFS, "ROFS" }, \
{ NFSERR_MLINK, "MLINK" }, \
{ NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
{ NFSERR_NOTEMPTY, "NOTEMPTY" }, \
{ NFSERR_DQUOT, "DQUOT" }, \
{ NFSERR_STALE, "STALE" }, \
{ NFSERR_REMOTE, "REMOTE" }, \
{ NFSERR_WFLUSH, "WFLUSH" }, \
{ NFSERR_BADHANDLE, "BADHANDLE" }, \
{ NFSERR_NOT_SYNC, "NOTSYNC" }, \
{ NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
{ NFSERR_NOTSUPP, "NOTSUPP" }, \
{ NFSERR_TOOSMALL, "TOOSMALL" }, \
{ NFSERR_SERVERFAULT, "REMOTEIO" }, \
{ NFSERR_BADTYPE, "BADTYPE" }, \
{ NFSERR_JUKEBOX, "JUKEBOX" })
TRACE_EVENT(nfs_xdr_status,
TP_PROTO(
int error
),
TP_ARGS(error),
TP_STRUCT__entry(
__field(int, error)
),
TP_fast_assign(
__entry->error = error;
),
TP_printk(
"error=%d (%s)",
__entry->error, nfs_show_status(__entry->error)
)
);
#endif /* _TRACE_NFS_H */
#undef TRACE_INCLUDE_PATH

View File

@ -350,7 +350,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
/**
* nfs_unlock_request - Unlock request and wake up sleepers.
* @req:
* @req: pointer to request
*/
void nfs_unlock_request(struct nfs_page *req)
{
@ -368,7 +368,7 @@ void nfs_unlock_request(struct nfs_page *req)
/**
* nfs_unlock_and_release_request - Unlock request and release the nfs_page
* @req:
* @req: pointer to request
*/
void nfs_unlock_and_release_request(struct nfs_page *req)
{
@ -531,7 +531,6 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
* @hdr: The pageio hdr
* @count: Number of bytes to read
* @offset: Initial offset
* @how: How to commit data (writes only)
* @cinfo: Commit information for the call (writes only)
*/
@ -634,7 +633,6 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
/**
* nfs_pgio_error - Clean up from a pageio error
* @desc: IO descriptor
* @hdr: pageio header
*/
static void nfs_pgio_error(struct nfs_pgio_header *hdr)
@ -768,8 +766,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
pageused = 0;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &hdr->pages);
nfs_list_move_request(req, &hdr->pages);
if (!last_page || last_page != req->wb_page) {
pageused++;
@ -893,6 +890,7 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
* nfs_can_coalesce_requests - test two requests for compatibility
* @prev: pointer to nfs_page
* @req: pointer to nfs_page
* @pgio: pointer to nfs_pagio_descriptor
*
* The nfs_page structures 'prev' and 'req' are compared to ensure that the
* page data area they describe is contiguous, and that their RPC
@ -961,8 +959,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
}
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
nfs_list_remove_request(req);
nfs_list_add_request(req, &mirror->pg_list);
nfs_list_move_request(req, &mirror->pg_list);
mirror->pg_count += req->wb_bytes;
return 1;
}
@ -988,6 +985,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
}
}
static void
nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
LIST_HEAD(head);
nfs_list_move_request(req, &head);
desc->pg_completion_ops->error_cleanup(&head, desc->pg_error);
}
/**
* nfs_pageio_add_request - Attempt to coalesce a request into a page list.
* @desc: destination io descriptor
@ -1025,10 +1032,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
nfs_page_group_unlock(req);
desc->pg_moreio = 1;
nfs_pageio_doio(desc);
if (desc->pg_error < 0)
return 0;
if (mirror->pg_recoalesce)
return 0;
if (desc->pg_error < 0 || mirror->pg_recoalesce)
goto out_cleanup_subreq;
/* retry add_request for this subreq */
nfs_page_group_lock(req);
continue;
@ -1061,6 +1066,10 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
desc->pg_error = PTR_ERR(subreq);
nfs_page_group_unlock(req);
return 0;
out_cleanup_subreq:
if (req != subreq)
nfs_pageio_cleanup_request(desc, subreq);
return 0;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@ -1079,7 +1088,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
struct nfs_page *req;
req = list_first_entry(&head, struct nfs_page, wb_list);
nfs_list_remove_request(req);
if (__nfs_pageio_add_request(desc, req))
continue;
if (desc->pg_error < 0) {
@ -1120,7 +1128,8 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
mirror = &desc->pg_mirrors[midx];
desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
desc->pg_completion_ops->error_cleanup(&mirror->pg_list,
desc->pg_error);
}
}
@ -1168,11 +1177,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (nfs_pgio_has_mirroring(desc))
desc->pg_mirror_idx = midx;
if (!nfs_pageio_add_request_mirror(desc, dupreq))
goto out_failed;
goto out_cleanup_subreq;
}
return 1;
out_cleanup_subreq:
if (req != dupreq)
nfs_pageio_cleanup_request(desc, dupreq);
out_failed:
nfs_pageio_error_cleanup(desc);
return 0;
@ -1194,7 +1206,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
desc->pg_mirror_idx = mirror_idx;
for (;;) {
nfs_pageio_doio(desc);
if (!mirror->pg_recoalesce)
if (desc->pg_error < 0 || !mirror->pg_recoalesce)
break;
if (!nfs_do_recoalesce(desc))
break;
@ -1222,9 +1234,8 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
if (!nfs_pageio_add_request(desc, req))
nfs_list_add_request(req, &failed);
nfs_list_move_request(req, &failed);
}
nfs_pageio_complete(desc);
if (!list_empty(&failed)) {

View File

@ -758,22 +758,35 @@ static int
pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
struct nfs_server *server,
struct list_head *layout_list)
__must_hold(&clp->cl_lock)
__must_hold(RCU)
{
struct pnfs_layout_hdr *lo, *next;
struct inode *inode;
list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
!list_empty(&lo->plh_bulk_destroy))
continue;
/* If the sb is being destroyed, just bail */
if (!nfs_sb_active(server->super))
break;
inode = igrab(lo->plh_inode);
if (inode == NULL)
continue;
list_del_init(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
continue;
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
iput(inode);
if (inode != NULL) {
list_del_init(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode,
layout_list))
continue;
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
iput(inode);
} else {
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
}
nfs_sb_deactive(server->super);
spin_lock(&clp->cl_lock);
rcu_read_lock();
return -EAGAIN;
@ -811,7 +824,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
/* Free all lsegs that are attached to commit buckets */
nfs_commit_inode(inode, 0);
pnfs_put_layout_hdr(lo);
iput(inode);
nfs_iput_and_deactive(inode);
}
return ret;
}

View File

@ -104,6 +104,7 @@ enum {
NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */
NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
};
enum layoutdriver_policy_flags {
@ -349,6 +350,7 @@ void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nf
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *,
const struct nfs4_deviceid *);
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
void nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node);
void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
void nfs4_deviceid_purge_client(const struct nfs_client *);

View File

@ -283,11 +283,23 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
}
EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
void
nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node)
{
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
smp_mb__after_atomic();
}
}
EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_available);
void
nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
{
node->timestamp_unavailable = jiffies;
smp_mb__before_atomic();
set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
smp_mb__after_atomic();
}
EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable);
@ -302,6 +314,7 @@ nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node)
if (time_in_range(node->timestamp_unavailable, start, end))
return true;
clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
smp_mb__after_atomic();
}
return false;
}

View File

@ -205,7 +205,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
}
static void
nfs_async_read_error(struct list_head *head)
nfs_async_read_error(struct list_head *head, int error)
{
struct nfs_page *req;

View File

@ -1919,7 +1919,7 @@ static int nfs_parse_devname(const char *dev_name,
/* kill possible hostname list: not supported */
comma = strchr(dev_name, ',');
if (comma != NULL && comma < end)
*comma = 0;
len = comma - dev_name;
}
if (len > maxnamlen)

View File

@ -39,6 +39,7 @@ nfs_free_unlinkdata(struct nfs_unlinkdata *data)
/**
* nfs_async_unlink_done - Sillydelete post-processing
* @task: rpc_task of the sillydelete
* @calldata: pointer to nfs_unlinkdata
*
* Do the directory attribute update.
*/
@ -54,7 +55,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
/**
* nfs_async_unlink_release - Release the sillydelete data.
* @task: rpc_task of the sillydelete
* @calldata: struct nfs_unlinkdata to release
*
* We need to call nfs_put_unlinkdata as a 'tk_release' task since the
* rpc_task would be freed too.
@ -159,8 +160,8 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf
/**
* nfs_async_unlink - asynchronous unlinking of a file
* @dir: parent directory of dentry
* @dentry: dentry to unlink
* @dentry: parent directory of dentry
* @name: name of dentry to unlink
*/
static int
nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
@ -324,6 +325,7 @@ static const struct rpc_call_ops nfs_rename_ops = {
* @new_dir: target directory for the rename
* @old_dentry: original dentry to be renamed
* @new_dentry: dentry to which the old_dentry should be renamed
* @complete: Function to run on successful completion
*
* It's expected that valid references to the dentries and inodes are held
*/

View File

@ -26,6 +26,7 @@
#include <linux/iversion.h>
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
#include "delegation.h"
#include "internal.h"
@ -712,11 +713,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
struct nfs_io_completion *ioc;
unsigned int pflags = memalloc_nofs_save();
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
ioc = nfs_io_completion_alloc(GFP_NOFS);
if (ioc)
nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
@ -727,6 +730,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_pageio_complete(&pgio);
nfs_io_completion_put(ioc);
memalloc_nofs_restore(pflags);
if (err < 0)
goto out_err;
err = pgio.pg_error;
@ -865,7 +870,6 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
/**
* nfs_request_add_commit_list - add request to a commit list
* @req: pointer to a struct nfs_page
* @dst: commit list head
* @cinfo: holds list lock and accounting info
*
* This sets the PG_CLEAN bit, updates the cinfo count of
@ -1412,20 +1416,27 @@ static void nfs_redirty_request(struct nfs_page *req)
nfs_release_request(req);
}
static void nfs_async_write_error(struct list_head *head)
static void nfs_async_write_error(struct list_head *head, int error)
{
struct nfs_page *req;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
if (nfs_error_is_fatal(error)) {
nfs_context_set_write_error(req->wb_context, error);
if (nfs_error_is_fatal_on_server(error)) {
nfs_write_error_remove_page(req);
continue;
}
}
nfs_redirty_request(req);
}
}
static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
{
nfs_async_write_error(&hdr->pages);
nfs_async_write_error(&hdr->pages, 0);
filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
hdr->args.offset + hdr->args.count - 1);
}

View File

@ -60,16 +60,6 @@ struct nfs4_cb_compound_hdr {
int status;
};
/*
* Handle decode buffer overflows out-of-line.
*/
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
dprintk("NFS: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
static __be32 *xdr_encode_empty_array(__be32 *p)
{
*p++ = xdr_zero;
@ -240,7 +230,6 @@ static int decode_cb_op_status(struct xdr_stream *xdr,
*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
out_unexpected:
dprintk("NFSD: Callback server returned operation %d but "
@ -309,7 +298,6 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
@ -437,7 +425,6 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
cb->cb_seq_status = status;
return status;
out_overflow:
print_overflow_msg(__func__, xdr);
status = -EIO;
goto out;
}

View File

@ -538,6 +538,7 @@ enum {
NFSPROC4_CLNT_OFFLOAD_CANCEL,
NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LAYOUTERROR,
};
/* nfs41 types */

View File

@ -261,5 +261,6 @@ struct nfs_server {
#define NFS_CAP_CLONE (1U << 23)
#define NFS_CAP_COPY (1U << 24)
#define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
#define NFS_CAP_LAYOUTERROR (1U << 26)
#endif

View File

@ -164,6 +164,16 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
list_add_tail(&req->wb_list, head);
}
/**
* nfs_list_move_request - Move a request to a new list
* @req: request
* @head: head of list into which to insert the request.
*/
static inline void
nfs_list_move_request(struct nfs_page *req, struct list_head *head)
{
list_move_tail(&req->wb_list, head);
}
/**
* nfs_list_remove_request - Remove a request from its wb_list

View File

@ -383,6 +383,41 @@ struct nfs42_layoutstat_data {
struct nfs42_layoutstat_res res;
};
struct nfs42_device_error {
struct nfs4_deviceid dev_id;
int status;
enum nfs_opnum4 opnum;
};
struct nfs42_layout_error {
__u64 offset;
__u64 length;
nfs4_stateid stateid;
struct nfs42_device_error errors[1];
};
#define NFS42_LAYOUTERROR_MAX 5
struct nfs42_layouterror_args {
struct nfs4_sequence_args seq_args;
struct inode *inode;
unsigned int num_errors;
struct nfs42_layout_error errors[NFS42_LAYOUTERROR_MAX];
};
struct nfs42_layouterror_res {
struct nfs4_sequence_res seq_res;
unsigned int num_errors;
int rpc_status;
};
struct nfs42_layouterror_data {
struct nfs42_layouterror_args args;
struct nfs42_layouterror_res res;
struct inode *inode;
struct pnfs_layout_segment *lseg;
};
struct nfs42_clone_args {
struct nfs4_sequence_args seq_args;
struct nfs_fh *src_fh;
@ -1549,7 +1584,7 @@ struct nfs_commit_data {
};
struct nfs_pgio_completion_ops {
void (*error_cleanup)(struct list_head *head);
void (*error_cleanup)(struct list_head *head, int);
void (*init_hdr)(struct nfs_pgio_header *hdr);
void (*completion)(struct nfs_pgio_header *hdr);
void (*reschedule_io)(struct nfs_pgio_header *hdr);

View File

@ -74,14 +74,12 @@ struct rpc_cred_cache;
struct rpc_authops;
struct rpc_auth {
unsigned int au_cslack; /* call cred size estimate */
/* guess at number of u32's auth adds before
* reply data; normally the verifier size: */
unsigned int au_rslack;
/* for gss, used to calculate au_rslack: */
unsigned int au_verfsize;
unsigned int au_rslack; /* reply cred size estimate */
unsigned int au_verfsize; /* size of reply verifier */
unsigned int au_ralign; /* words before UL header */
unsigned int au_flags; /* various flags */
const struct rpc_authops *au_ops; /* operations */
unsigned int au_flags;
const struct rpc_authops *au_ops;
rpc_authflavor_t au_flavor; /* pseudoflavor (note may
* differ from the flavor in
* au_ops->au_flavor in gss
@ -131,13 +129,15 @@ struct rpc_credops {
void (*crdestroy)(struct rpc_cred *);
int (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
__be32 * (*crmarshal)(struct rpc_task *, __be32 *);
int (*crmarshal)(struct rpc_task *task,
struct xdr_stream *xdr);
int (*crrefresh)(struct rpc_task *);
__be32 * (*crvalidate)(struct rpc_task *, __be32 *);
int (*crwrap_req)(struct rpc_task *, kxdreproc_t,
void *, __be32 *, void *);
int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
void *, __be32 *, void *);
int (*crvalidate)(struct rpc_task *task,
struct xdr_stream *xdr);
int (*crwrap_req)(struct rpc_task *task,
struct xdr_stream *xdr);
int (*crunwrap_resp)(struct rpc_task *task,
struct xdr_stream *xdr);
int (*crkey_timeout)(struct rpc_cred *);
char * (*crstringify_acceptor)(struct rpc_cred *);
bool (*crneed_reencode)(struct rpc_task *);
@ -165,10 +165,18 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
void put_rpccred(struct rpc_cred *);
__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *);
int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
int rpcauth_marshcred(struct rpc_task *task,
struct xdr_stream *xdr);
int rpcauth_checkverf(struct rpc_task *task,
struct xdr_stream *xdr);
int rpcauth_wrap_req_encode(struct rpc_task *task,
struct xdr_stream *xdr);
int rpcauth_wrap_req(struct rpc_task *task,
struct xdr_stream *xdr);
int rpcauth_unwrap_resp_decode(struct rpc_task *task,
struct xdr_stream *xdr);
int rpcauth_unwrap_resp(struct rpc_task *task,
struct xdr_stream *xdr);
bool rpcauth_xmit_need_reencode(struct rpc_task *task);
int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *);

View File

@ -169,6 +169,9 @@ int rpcb_v4_register(struct net *net, const u32 program,
const char *netid);
void rpcb_getport_async(struct rpc_task *);
void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int hdrsize);
void rpc_call_start(struct rpc_task *);
int rpc_call_async(struct rpc_clnt *clnt,
const struct rpc_message *msg, int flags,

View File

@ -1,4 +1,44 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Dumb way to share this static piece of information with nfsd
* Define the string that exports the set of kernel-supported
* Kerberos enctypes. This list is sent via upcall to gssd, and
* is also exposed via the nfsd /proc API. The consumers generally
* treat this as an ordered list, where the first item in the list
* is the most preferred.
*/
#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
/*
* NB: This list includes encryption types that were deprecated
* by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC).
*
* ENCTYPE_AES256_CTS_HMAC_SHA1_96
* ENCTYPE_AES128_CTS_HMAC_SHA1_96
* ENCTYPE_DES3_CBC_SHA1
* ENCTYPE_ARCFOUR_HMAC
*/
#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23"
#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
/*
* NB: This list includes encryption types that were deprecated
* by RFC 8429 and RFC 6649.
*
* ENCTYPE_AES256_CTS_HMAC_SHA1_96
* ENCTYPE_AES128_CTS_HMAC_SHA1_96
* ENCTYPE_DES3_CBC_SHA1
* ENCTYPE_ARCFOUR_HMAC
* ENCTYPE_DES_CBC_MD5
* ENCTYPE_DES_CBC_CRC
* ENCTYPE_DES_CBC_MD4
*/
#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2"
#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */

View File

@ -97,6 +97,7 @@ typedef void (*rpc_action)(struct rpc_task *);
struct rpc_call_ops {
void (*rpc_call_prepare)(struct rpc_task *, void *);
void (*rpc_call_prepare_transmit)(struct rpc_task *, void *);
void (*rpc_call_done)(struct rpc_task *, void *);
void (*rpc_count_stats)(struct rpc_task *, void *);
void (*rpc_release)(void *);
@ -303,4 +304,12 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
}
#endif /* CONFIG_SUNRPC_SWAP */
static inline bool
rpc_task_need_resched(const struct rpc_task *task)
{
if (RPC_IS_QUEUED(task) || task->tk_callback)
return true;
return false;
}
#endif /* _LINUX_SUNRPC_SCHED_H_ */

View File

@ -87,6 +87,16 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define xdr_one cpu_to_be32(1)
#define xdr_two cpu_to_be32(2)
#define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL)
#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX)
#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT)
#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)
#define rpc_call cpu_to_be32(RPC_CALL)
#define rpc_reply cpu_to_be32(RPC_REPLY)
#define rpc_msg_accepted cpu_to_be32(RPC_MSG_ACCEPTED)
#define rpc_success cpu_to_be32(RPC_SUCCESS)
#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL)
#define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH)
@ -95,6 +105,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR)
#define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY)
#define rpc_mismatch cpu_to_be32(RPC_MISMATCH)
#define rpc_auth_error cpu_to_be32(RPC_AUTH_ERROR)
#define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK)
#define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED)
#define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
@ -103,7 +116,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK)
#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
#define rpc_autherr_oldseqnum cpu_to_be32(101)
/*
* Miscellaneous XDR helper functions
@ -167,7 +179,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
extern void xdr_shift_buf(struct xdr_buf *, size_t);
extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
@ -217,6 +228,8 @@ struct xdr_stream {
struct kvec scratch; /* Scratch buffer */
struct page **page_ptr; /* pointer to the current page */
unsigned int nwords; /* Remaining decode buffer length */
struct rpc_rqst *rqst; /* For debugging */
};
/*
@ -227,7 +240,8 @@ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
@ -235,7 +249,8 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, unsigned int len);
extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);

View File

@ -196,8 +196,6 @@ struct rpc_xprt {
size_t max_payload; /* largest RPC payload size,
in bytes */
unsigned int tsh_size; /* size of transport specific
header */
struct rpc_wait_queue binding; /* requests waiting on rpcbind */
struct rpc_wait_queue sending; /* requests waiting to send */
@ -362,11 +360,6 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
unsigned int max_req);
void xprt_free(struct rpc_xprt *);
static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
{
return p + xprt->tsh_size;
}
static inline int
xprt_enable_swap(struct rpc_xprt *xprt)
{

View File

@ -26,6 +26,7 @@ struct sock_xprt {
*/
struct socket * sock;
struct sock * inet;
struct file * file;
/*
* State of TCP reply receive

View File

@ -0,0 +1,361 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2018 Oracle. All rights reserved.
*
* Trace point definitions for the "rpcgss" subsystem.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rpcgss
#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_RPCGSS_H
#include <linux/tracepoint.h>
/**
** GSS-API related trace events
**/
TRACE_DEFINE_ENUM(GSS_S_BAD_MECH);
TRACE_DEFINE_ENUM(GSS_S_BAD_NAME);
TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE);
TRACE_DEFINE_ENUM(GSS_S_BAD_BINDINGS);
TRACE_DEFINE_ENUM(GSS_S_BAD_STATUS);
TRACE_DEFINE_ENUM(GSS_S_BAD_SIG);
TRACE_DEFINE_ENUM(GSS_S_NO_CRED);
TRACE_DEFINE_ENUM(GSS_S_NO_CONTEXT);
TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_TOKEN);
TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_CREDENTIAL);
TRACE_DEFINE_ENUM(GSS_S_CREDENTIALS_EXPIRED);
TRACE_DEFINE_ENUM(GSS_S_CONTEXT_EXPIRED);
TRACE_DEFINE_ENUM(GSS_S_FAILURE);
TRACE_DEFINE_ENUM(GSS_S_BAD_QOP);
TRACE_DEFINE_ENUM(GSS_S_UNAUTHORIZED);
TRACE_DEFINE_ENUM(GSS_S_UNAVAILABLE);
TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_ELEMENT);
TRACE_DEFINE_ENUM(GSS_S_NAME_NOT_MN);
TRACE_DEFINE_ENUM(GSS_S_CONTINUE_NEEDED);
TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_TOKEN);
TRACE_DEFINE_ENUM(GSS_S_OLD_TOKEN);
TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN);
TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN);
#define show_gss_status(x) \
__print_flags(x, "|", \
{ GSS_S_BAD_MECH, "GSS_S_BAD_MECH" }, \
{ GSS_S_BAD_NAME, "GSS_S_BAD_NAME" }, \
{ GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" }, \
{ GSS_S_BAD_BINDINGS, "GSS_S_BAD_BINDINGS" }, \
{ GSS_S_BAD_STATUS, "GSS_S_BAD_STATUS" }, \
{ GSS_S_BAD_SIG, "GSS_S_BAD_SIG" }, \
{ GSS_S_NO_CRED, "GSS_S_NO_CRED" }, \
{ GSS_S_NO_CONTEXT, "GSS_S_NO_CONTEXT" }, \
{ GSS_S_DEFECTIVE_TOKEN, "GSS_S_DEFECTIVE_TOKEN" }, \
{ GSS_S_DEFECTIVE_CREDENTIAL, "GSS_S_DEFECTIVE_CREDENTIAL" }, \
{ GSS_S_CREDENTIALS_EXPIRED, "GSS_S_CREDENTIALS_EXPIRED" }, \
{ GSS_S_CONTEXT_EXPIRED, "GSS_S_CONTEXT_EXPIRED" }, \
{ GSS_S_FAILURE, "GSS_S_FAILURE" }, \
{ GSS_S_BAD_QOP, "GSS_S_BAD_QOP" }, \
{ GSS_S_UNAUTHORIZED, "GSS_S_UNAUTHORIZED" }, \
{ GSS_S_UNAVAILABLE, "GSS_S_UNAVAILABLE" }, \
{ GSS_S_DUPLICATE_ELEMENT, "GSS_S_DUPLICATE_ELEMENT" }, \
{ GSS_S_NAME_NOT_MN, "GSS_S_NAME_NOT_MN" }, \
{ GSS_S_CONTINUE_NEEDED, "GSS_S_CONTINUE_NEEDED" }, \
{ GSS_S_DUPLICATE_TOKEN, "GSS_S_DUPLICATE_TOKEN" }, \
{ GSS_S_OLD_TOKEN, "GSS_S_OLD_TOKEN" }, \
{ GSS_S_UNSEQ_TOKEN, "GSS_S_UNSEQ_TOKEN" }, \
{ GSS_S_GAP_TOKEN, "GSS_S_GAP_TOKEN" })
DECLARE_EVENT_CLASS(rpcgss_gssapi_event,
TP_PROTO(
const struct rpc_task *task,
u32 maj_stat
),
TP_ARGS(task, maj_stat),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, maj_stat)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->maj_stat = maj_stat;
),
TP_printk("task:%u@%u maj_stat=%s",
__entry->task_id, __entry->client_id,
__entry->maj_stat == 0 ?
"GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat))
);
#define DEFINE_GSSAPI_EVENT(name) \
DEFINE_EVENT(rpcgss_gssapi_event, rpcgss_##name, \
TP_PROTO( \
const struct rpc_task *task, \
u32 maj_stat \
), \
TP_ARGS(task, maj_stat))
TRACE_EVENT(rpcgss_import_ctx,
TP_PROTO(
int status
),
TP_ARGS(status),
TP_STRUCT__entry(
__field(int, status)
),
TP_fast_assign(
__entry->status = status;
),
TP_printk("status=%d", __entry->status)
);
DEFINE_GSSAPI_EVENT(get_mic);
DEFINE_GSSAPI_EVENT(verify_mic);
DEFINE_GSSAPI_EVENT(wrap);
DEFINE_GSSAPI_EVENT(unwrap);
/**
** GSS auth unwrap failures
**/
TRACE_EVENT(rpcgss_unwrap_failed,
TP_PROTO(
const struct rpc_task *task
),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
),
TP_printk("task:%u@%u", __entry->task_id, __entry->client_id)
);
TRACE_EVENT(rpcgss_bad_seqno,
TP_PROTO(
const struct rpc_task *task,
u32 expected,
u32 received
),
TP_ARGS(task, expected, received),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, expected)
__field(u32, received)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->expected = expected;
__entry->received = received;
),
TP_printk("task:%u@%u expected seqno %u, received seqno %u",
__entry->task_id, __entry->client_id,
__entry->expected, __entry->received)
);
TRACE_EVENT(rpcgss_seqno,
TP_PROTO(
const struct rpc_task *task
),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, seqno)
),
TP_fast_assign(
const struct rpc_rqst *rqst = task->tk_rqstp;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->seqno = rqst->rq_seqno;
),
TP_printk("task:%u@%u xid=0x%08x seqno=%u",
__entry->task_id, __entry->client_id,
__entry->xid, __entry->seqno)
);
TRACE_EVENT(rpcgss_need_reencode,
TP_PROTO(
const struct rpc_task *task,
u32 seq_xmit,
bool ret
),
TP_ARGS(task, seq_xmit, ret),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, seq_xmit)
__field(u32, seqno)
__field(bool, ret)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
__entry->seq_xmit = seq_xmit;
__entry->seqno = task->tk_rqstp->rq_seqno;
__entry->ret = ret;
),
TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded",
__entry->task_id, __entry->client_id,
__entry->xid, __entry->seqno, __entry->seq_xmit,
__entry->ret ? "" : "un")
);
/**
** gssd upcall related trace events
**/
TRACE_EVENT(rpcgss_upcall_msg,
TP_PROTO(
const char *buf
),
TP_ARGS(buf),
TP_STRUCT__entry(
__string(msg, buf)
),
TP_fast_assign(
__assign_str(msg, buf)
),
TP_printk("msg='%s'", __get_str(msg))
);
TRACE_EVENT(rpcgss_upcall_result,
TP_PROTO(
u32 uid,
int result
),
TP_ARGS(uid, result),
TP_STRUCT__entry(
__field(u32, uid)
__field(int, result)
),
TP_fast_assign(
__entry->uid = uid;
__entry->result = result;
),
TP_printk("for uid %u, result=%d", __entry->uid, __entry->result)
);
TRACE_EVENT(rpcgss_context,
TP_PROTO(
unsigned long expiry,
unsigned long now,
unsigned int timeout,
unsigned int len,
const u8 *data
),
TP_ARGS(expiry, now, timeout, len, data),
TP_STRUCT__entry(
__field(unsigned long, expiry)
__field(unsigned long, now)
__field(unsigned int, timeout)
__field(int, len)
__string(acceptor, data)
),
TP_fast_assign(
__entry->expiry = expiry;
__entry->now = now;
__entry->timeout = timeout;
__entry->len = len;
strncpy(__get_str(acceptor), data, len);
),
TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s",
__entry->expiry, __entry->now, __entry->timeout,
__entry->len, __get_str(acceptor))
);
/**
** Miscellaneous events
*/
TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5);
TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I);
TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P);
#define show_pseudoflavor(x) \
__print_symbolic(x, \
{ RPC_AUTH_GSS_KRB5, "RPC_AUTH_GSS_KRB5" }, \
{ RPC_AUTH_GSS_KRB5I, "RPC_AUTH_GSS_KRB5I" }, \
{ RPC_AUTH_GSS_KRB5P, "RPC_AUTH_GSS_KRB5P" })
TRACE_EVENT(rpcgss_createauth,
TP_PROTO(
unsigned int flavor,
int error
),
TP_ARGS(flavor, error),
TP_STRUCT__entry(
__field(unsigned int, flavor)
__field(int, error)
),
TP_fast_assign(
__entry->flavor = flavor;
__entry->error = error;
),
TP_printk("flavor=%s error=%d",
show_pseudoflavor(__entry->flavor), __entry->error)
);
#endif /* _TRACE_RPCGSS_H */
#include <trace/define_trace.h>

View File

@ -521,12 +521,18 @@ TRACE_EVENT(xprtrdma_post_send,
TP_STRUCT__entry(
__field(const void *, req)
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, num_sge)
__field(int, signaled)
__field(int, status)
),
TP_fast_assign(
const struct rpc_rqst *rqst = &req->rl_slot;
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->req = req;
__entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
__entry->signaled = req->rl_sendctx->sc_wr.send_flags &
@ -534,9 +540,11 @@ TRACE_EVENT(xprtrdma_post_send,
__entry->status = status;
),
TP_printk("req=%p, %d SGEs%s, status=%d",
TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d",
__entry->task_id, __entry->client_id,
__entry->req, __entry->num_sge,
(__entry->signaled ? ", signaled" : ""),
(__entry->num_sge == 1 ? "" : "s"),
(__entry->signaled ? "signaled " : ""),
__entry->status
)
);

View File

@ -77,6 +77,50 @@ TRACE_EVENT(rpc_request,
)
);
TRACE_DEFINE_ENUM(RPC_TASK_ASYNC);
TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
TRACE_DEFINE_ENUM(RPC_TASK_SENT);
TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT);
TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT);
TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
#define rpc_show_task_flags(flags) \
__print_flags(flags, "|", \
{ RPC_TASK_ASYNC, "ASYNC" }, \
{ RPC_TASK_SWAPPER, "SWAPPER" }, \
{ RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
{ RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
{ RPC_TASK_DYNAMIC, "DYNAMIC" }, \
{ RPC_TASK_KILLED, "KILLED" }, \
{ RPC_TASK_SOFT, "SOFT" }, \
{ RPC_TASK_SOFTCONN, "SOFTCONN" }, \
{ RPC_TASK_SENT, "SENT" }, \
{ RPC_TASK_TIMEOUT, "TIMEOUT" }, \
{ RPC_TASK_NOCONNECT, "NOCONNECT" }, \
{ RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" })
TRACE_DEFINE_ENUM(RPC_TASK_RUNNING);
TRACE_DEFINE_ENUM(RPC_TASK_QUEUED);
TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
#define rpc_show_runstate(flags) \
__print_flags(flags, "|", \
{ (1UL << RPC_TASK_RUNNING), "RUNNING" }, \
{ (1UL << RPC_TASK_QUEUED), "QUEUED" }, \
{ (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
{ (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
{ (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
{ (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
DECLARE_EVENT_CLASS(rpc_task_running,
TP_PROTO(const struct rpc_task *task, const void *action),
@ -102,10 +146,10 @@ DECLARE_EVENT_CLASS(rpc_task_running,
__entry->flags = task->tk_flags;
),
TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf",
TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf",
__entry->task_id, __entry->client_id,
__entry->flags,
__entry->runstate,
rpc_show_task_flags(__entry->flags),
rpc_show_runstate(__entry->runstate),
__entry->status,
__entry->action
)
@ -149,10 +193,10 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
__assign_str(q_name, rpc_qname(q));
),
TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s",
__entry->task_id, __entry->client_id,
__entry->flags,
__entry->runstate,
rpc_show_task_flags(__entry->flags),
rpc_show_runstate(__entry->runstate),
__entry->status,
__entry->timeout,
__get_str(q_name)
@ -169,6 +213,87 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
DEFINE_RPC_QUEUED_EVENT(sleep);
DEFINE_RPC_QUEUED_EVENT(wakeup);
DECLARE_EVENT_CLASS(rpc_failure,
TP_PROTO(const struct rpc_task *task),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
),
TP_printk("task:%u@%u",
__entry->task_id, __entry->client_id)
);
#define DEFINE_RPC_FAILURE(name) \
DEFINE_EVENT(rpc_failure, rpc_bad_##name, \
TP_PROTO( \
const struct rpc_task *task \
), \
TP_ARGS(task))
DEFINE_RPC_FAILURE(callhdr);
DEFINE_RPC_FAILURE(verifier);
DECLARE_EVENT_CLASS(rpc_reply_event,
TP_PROTO(
const struct rpc_task *task
),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__string(progname, task->tk_client->cl_program->name)
__field(u32, version)
__string(procname, rpc_proc_name(task))
__string(servername, task->tk_xprt->servername)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
__assign_str(progname, task->tk_client->cl_program->name)
__entry->version = task->tk_client->cl_vers;
__assign_str(procname, rpc_proc_name(task))
__assign_str(servername, task->tk_xprt->servername)
),
TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s",
__entry->task_id, __entry->client_id, __get_str(servername),
__entry->xid, __get_str(progname), __entry->version,
__get_str(procname))
)
#define DEFINE_RPC_REPLY_EVENT(name) \
DEFINE_EVENT(rpc_reply_event, rpc__##name, \
TP_PROTO( \
const struct rpc_task *task \
), \
TP_ARGS(task))
DEFINE_RPC_REPLY_EVENT(prog_unavail);
DEFINE_RPC_REPLY_EVENT(prog_mismatch);
DEFINE_RPC_REPLY_EVENT(proc_unavail);
DEFINE_RPC_REPLY_EVENT(garbage_args);
DEFINE_RPC_REPLY_EVENT(unparsable);
DEFINE_RPC_REPLY_EVENT(mismatch);
DEFINE_RPC_REPLY_EVENT(stale_creds);
DEFINE_RPC_REPLY_EVENT(bad_creds);
DEFINE_RPC_REPLY_EVENT(auth_tooweak);
TRACE_EVENT(rpc_stats_latency,
TP_PROTO(
@ -210,6 +335,169 @@ TRACE_EVENT(rpc_stats_latency,
__entry->backlog, __entry->rtt, __entry->execute)
);
TRACE_EVENT(rpc_xdr_overflow,
TP_PROTO(
const struct xdr_stream *xdr,
size_t requested
),
TP_ARGS(xdr, requested),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, version)
__field(size_t, requested)
__field(const void *, end)
__field(const void *, p)
__field(const void *, head_base)
__field(size_t, head_len)
__field(const void *, tail_base)
__field(size_t, tail_len)
__field(unsigned int, page_len)
__field(unsigned int, len)
__string(progname,
xdr->rqst->rq_task->tk_client->cl_program->name)
__string(procedure,
xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
),
TP_fast_assign(
if (xdr->rqst) {
const struct rpc_task *task = xdr->rqst->rq_task;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__assign_str(progname,
task->tk_client->cl_program->name)
__entry->version = task->tk_client->cl_vers;
__assign_str(procedure, task->tk_msg.rpc_proc->p_name)
} else {
__entry->task_id = 0;
__entry->client_id = 0;
__assign_str(progname, "unknown")
__entry->version = 0;
__assign_str(procedure, "unknown")
}
__entry->requested = requested;
__entry->end = xdr->end;
__entry->p = xdr->p;
__entry->head_base = xdr->buf->head[0].iov_base,
__entry->head_len = xdr->buf->head[0].iov_len,
__entry->page_len = xdr->buf->page_len,
__entry->tail_base = xdr->buf->tail[0].iov_base,
__entry->tail_len = xdr->buf->tail[0].iov_len,
__entry->len = xdr->buf->len;
),
TP_printk(
"task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
__entry->task_id, __entry->client_id,
__get_str(progname), __entry->version, __get_str(procedure),
__entry->requested, __entry->p, __entry->end,
__entry->head_base, __entry->head_len,
__entry->page_len,
__entry->tail_base, __entry->tail_len,
__entry->len
)
);
TRACE_EVENT(rpc_xdr_alignment,
TP_PROTO(
const struct xdr_stream *xdr,
size_t offset,
unsigned int copied
),
TP_ARGS(xdr, offset, copied),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, version)
__field(size_t, offset)
__field(unsigned int, copied)
__field(const void *, head_base)
__field(size_t, head_len)
__field(const void *, tail_base)
__field(size_t, tail_len)
__field(unsigned int, page_len)
__field(unsigned int, len)
__string(progname,
xdr->rqst->rq_task->tk_client->cl_program->name)
__string(procedure,
xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
),
TP_fast_assign(
const struct rpc_task *task = xdr->rqst->rq_task;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__assign_str(progname,
task->tk_client->cl_program->name)
__entry->version = task->tk_client->cl_vers;
__assign_str(procedure, task->tk_msg.rpc_proc->p_name)
__entry->offset = offset;
__entry->copied = copied;
__entry->head_base = xdr->buf->head[0].iov_base,
__entry->head_len = xdr->buf->head[0].iov_len,
__entry->page_len = xdr->buf->page_len,
__entry->tail_base = xdr->buf->tail[0].iov_base,
__entry->tail_len = xdr->buf->tail[0].iov_len,
__entry->len = xdr->buf->len;
),
TP_printk(
"task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
__entry->task_id, __entry->client_id,
__get_str(progname), __entry->version, __get_str(procedure),
__entry->offset, __entry->copied,
__entry->head_base, __entry->head_len,
__entry->page_len,
__entry->tail_base, __entry->tail_len,
__entry->len
)
);
TRACE_EVENT(rpc_reply_pages,
TP_PROTO(
const struct rpc_rqst *req
),
TP_ARGS(req),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, head_base)
__field(size_t, head_len)
__field(const void *, tail_base)
__field(size_t, tail_len)
__field(unsigned int, page_len)
),
TP_fast_assign(
__entry->task_id = req->rq_task->tk_pid;
__entry->client_id = req->rq_task->tk_client->cl_clid;
__entry->head_base = req->rq_rcv_buf.head[0].iov_base;
__entry->head_len = req->rq_rcv_buf.head[0].iov_len;
__entry->page_len = req->rq_rcv_buf.page_len;
__entry->tail_base = req->rq_rcv_buf.tail[0].iov_base;
__entry->tail_len = req->rq_rcv_buf.tail[0].iov_len;
),
TP_printk(
"task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n",
__entry->task_id, __entry->client_id,
__entry->head_base, __entry->head_len,
__entry->page_len,
__entry->tail_base, __entry->tail_len
)
);
/*
* First define the enums in the below macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
@ -404,9 +692,68 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
DEFINE_RPC_XPRT_EVENT(timer);
DEFINE_RPC_XPRT_EVENT(lookup_rqst);
DEFINE_RPC_XPRT_EVENT(transmit);
DEFINE_RPC_XPRT_EVENT(complete_rqst);
TRACE_EVENT(xprt_transmit,
TP_PROTO(
const struct rpc_rqst *rqst,
int status
),
TP_ARGS(rqst, status),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, seqno)
__field(int, status)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->seqno = rqst->rq_seqno;
__entry->status = status;
),
TP_printk(
"task:%u@%u xid=0x%08x seqno=%u status=%d",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->seqno, __entry->status)
);
TRACE_EVENT(xprt_enq_xmit,
TP_PROTO(
const struct rpc_task *task,
int stage
),
TP_ARGS(task, stage),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, seqno)
__field(int, stage)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
__entry->seqno = task->tk_rqstp->rq_seqno;
__entry->stage = stage;
),
TP_printk(
"task:%u@%u xid=0x%08x seqno=%u stage=%d",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->seqno, __entry->stage)
);
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),

View File

@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
bool "Secure RPC: Disable insecure Kerberos encryption types"
depends on RPCSEC_GSS_KRB5
default n
help
Choose Y here to disable the use of deprecated encryption types
with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
and DES-CBC-MD4. These types were deprecated by RFC 6649 because
they were found to be insecure.
N is the default because many sites have deployed KDCs and
keytabs that contain only these deprecated encryption types.
Choosing Y prevents the use of known-insecure encryption types
but might result in compatibility problems.
config SUNRPC_DEBUG
bool "RPC: Enable dprintk debugging"
depends on SUNRPC && SYSCTL

View File

@ -17,9 +17,7 @@
#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
#include <trace/events/sunrpc.h>
#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
struct rpc_cred_cache {
@ -267,8 +265,6 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
}
}
rcu_read_unlock();
dprintk("RPC: %s returns %d\n", __func__, result);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
@ -636,9 +632,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
struct rpc_cred *ret;
const struct cred *cred = current_cred();
dprintk("RPC: looking up %s cred\n",
auth->au_ops->au_name);
memset(&acred, 0, sizeof(acred));
acred.cred = cred;
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
@ -670,8 +663,6 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
};
struct rpc_cred *ret;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
put_cred(acred.cred);
return ret;
@ -688,8 +679,6 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
if (!acred.principal)
return NULL;
dprintk("RPC: %5u looking up %s machine cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
@ -698,8 +687,6 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, auth->au_ops->au_name);
return rpcauth_lookupcred(auth, lookupflags);
}
@ -771,75 +758,102 @@ put_rpccred(struct rpc_cred *cred)
}
EXPORT_SYMBOL_GPL(put_rpccred);
__be32 *
rpcauth_marshcred(struct rpc_task *task, __be32 *p)
/**
* rpcauth_marshcred - Append RPC credential to end of @xdr
* @task: controlling RPC task
* @xdr: xdr_stream containing initial portion of RPC Call header
*
* On success, an appropriate verifier is added to @xdr, @xdr is
* updated to point past the verifier, and zero is returned.
* Otherwise, @xdr is in an undefined state and a negative errno
* is returned.
*/
int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
dprintk("RPC: %5u marshaling %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
return cred->cr_ops->crmarshal(task, p);
return ops->crmarshal(task, xdr);
}
__be32 *
rpcauth_checkverf(struct rpc_task *task, __be32 *p)
/**
* rpcauth_wrap_req_encode - XDR encode the RPC procedure
* @task: controlling RPC task
* @xdr: stream where on-the-wire bytes are to be marshalled
*
* On success, @xdr contains the encoded and wrapped message.
* Otherwise, @xdr is in an undefined state.
*/
int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode;
dprintk("RPC: %5u validating %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
return cred->cr_ops->crvalidate(task, p);
}
static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *data, void *obj)
{
struct xdr_stream xdr;
xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
encode(rqstp, &xdr, obj);
}
int
rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crwrap_req)
return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
/* By default, we encode the arguments normally. */
rpcauth_wrap_req_encode(encode, rqstp, data, obj);
encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp);
return 0;
}
EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode);
static int
rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
__be32 *data, void *obj)
/**
* rpcauth_wrap_req - XDR encode and wrap the RPC procedure
* @task: controlling RPC task
* @xdr: stream where on-the-wire bytes are to be marshalled
*
* On success, @xdr contains the encoded and wrapped message,
* and zero is returned. Otherwise, @xdr is in an undefined
* state and a negative errno is returned.
*/
int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
struct xdr_stream xdr;
const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
return decode(rqstp, &xdr, obj);
return ops->crwrap_req(task, xdr);
}
/**
* rpcauth_checkverf - Validate verifier in RPC Reply header
* @task: controlling RPC task
* @xdr: xdr_stream containing RPC Reply header
*
* On success, @xdr is updated to point past the verifier and
* zero is returned. Otherwise, @xdr is in an undefined state
* and a negative errno is returned.
*/
int
rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
__be32 *data, void *obj)
rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crunwrap_resp)
return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
data, obj);
/* By default, we decode the arguments normally. */
return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
return ops->crvalidate(task, xdr);
}
/**
* rpcauth_unwrap_resp_decode - Invoke XDR decode function
* @task: controlling RPC task
* @xdr: stream where the Reply message resides
*
* Returns zero on success; otherwise a negative errno is returned.
*/
int
rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr)
{
kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp);
}
EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode);
/**
* rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred
* @task: controlling RPC task
* @xdr: stream where the Reply message resides
*
* Returns zero on success; otherwise a negative errno is returned.
*/
int
rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
{
const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
return ops->crunwrap_resp(task, xdr);
}
bool
@ -865,8 +879,6 @@ rpcauth_refreshcred(struct rpc_task *task)
goto out;
cred = task->tk_rqstp->rq_cred;
}
dprintk("RPC: %5u refreshing %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
err = cred->cr_ops->crrefresh(task);
out:
@ -880,8 +892,6 @@ rpcauth_invalcred(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u invalidating %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
if (cred)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
}

View File

@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
auth_rpcgss-y := auth_gss.o gss_generic_token.o \
gss_mech_switch.o svcauth_gss.o \
gss_rpc_upcall.o gss_rpc_xdr.o
gss_rpc_upcall.o gss_rpc_xdr.o trace.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/auth_gss/auth_gss.c
*
@ -8,34 +9,8 @@
*
* Dug Song <dugsong@monkey.org>
* Andy Adamson <andros@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
@ -55,6 +30,8 @@
#include "../netns.h"
#include <trace/events/rpcgss.h>
static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
@ -260,6 +237,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
}
ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
if (ret < 0) {
trace_rpcgss_import_ctx(ret);
p = ERR_PTR(ret);
goto err;
}
@ -275,12 +253,9 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
if (IS_ERR(p))
goto err;
done:
dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
ctx->gc_acceptor.data);
return p;
trace_rpcgss_context(ctx->gc_expiry, now, timeout,
ctx->gc_acceptor.len, ctx->gc_acceptor.data);
err:
dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
return p;
}
@ -354,10 +329,8 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
if (auth && pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
}
dprintk("RPC: %s found nothing\n", __func__);
return NULL;
}
@ -456,7 +429,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
size_t buflen = sizeof(gss_msg->databuf);
int len;
len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
buflen -= len;
p += len;
@ -467,7 +440,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
* identity that we are authenticating to.
*/
if (target_name) {
len = scnprintf(p, buflen, "target=%s ", target_name);
len = scnprintf(p, buflen, " target=%s", target_name);
buflen -= len;
p += len;
gss_msg->msg.len += len;
@ -487,11 +460,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
char *c = strchr(service_name, '@');
if (!c)
len = scnprintf(p, buflen, "service=%s ",
len = scnprintf(p, buflen, " service=%s",
service_name);
else
len = scnprintf(p, buflen,
"service=%.*s srchost=%s ",
" service=%.*s srchost=%s",
(int)(c - service_name),
service_name, c + 1);
buflen -= len;
@ -500,17 +473,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
}
if (mech->gm_upcall_enctypes) {
len = scnprintf(p, buflen, "enctypes=%s ",
len = scnprintf(p, buflen, " enctypes=%s",
mech->gm_upcall_enctypes);
buflen -= len;
p += len;
gss_msg->msg.len += len;
}
trace_rpcgss_upcall_msg(gss_msg->databuf);
len = scnprintf(p, buflen, "\n");
if (len == 0)
goto out_overflow;
gss_msg->msg.len += len;
gss_msg->msg.data = gss_msg->databuf;
return 0;
out_overflow:
@ -603,8 +576,6 @@ gss_refresh_upcall(struct rpc_task *task)
struct rpc_pipe *pipe;
int err = 0;
dprintk("RPC: %5u %s for uid %u\n",
task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
@ -612,7 +583,8 @@ gss_refresh_upcall(struct rpc_task *task)
warn_gssd();
task->tk_timeout = 15*HZ;
rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
return -EAGAIN;
err = -EAGAIN;
goto out;
}
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
@ -635,9 +607,8 @@ gss_refresh_upcall(struct rpc_task *task)
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
out:
dprintk("RPC: %5u %s for uid %u result %d\n",
task->tk_pid, __func__,
from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
cred->cr_cred->fsuid), err);
return err;
}
@ -652,14 +623,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
DEFINE_WAIT(wait);
int err;
dprintk("RPC: %s for uid %u\n",
__func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
retry:
err = 0;
/* if gssd is down, just skip upcalling altogether */
if (!gssd_running(net)) {
warn_gssd();
return -EACCES;
err = -EACCES;
goto out;
}
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
@ -700,8 +670,8 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
out:
dprintk("RPC: %s for uid %u result %d\n",
__func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
cred->cr_cred->fsuid), err);
return err;
}
@ -794,7 +764,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
err:
kfree(buf);
out:
dprintk("RPC: %s returning %zd\n", __func__, err);
return err;
}
@ -863,8 +832,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
if (msg->errno < 0) {
dprintk("RPC: %s releasing msg %p\n",
__func__, gss_msg);
refcount_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
if (msg->errno == -ETIMEDOUT)
@ -1024,8 +991,6 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
struct rpc_auth * auth;
int err = -ENOMEM; /* XXX? */
dprintk("RPC: creating GSS authenticator for client %p\n", clnt);
if (!try_module_get(THIS_MODULE))
return ERR_PTR(err);
if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
@ -1041,10 +1006,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
gss_auth->net = get_net(rpc_net_ns(clnt));
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech) {
dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
if (!gss_auth->mech)
goto err_put_net;
}
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
@ -1053,6 +1016,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
auth->au_verfsize = GSS_VERF_SLACK >> 2;
auth->au_ralign = GSS_VERF_SLACK >> 2;
auth->au_flags = 0;
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
@ -1099,6 +1064,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
kfree(gss_auth);
out_dec:
module_put(THIS_MODULE);
trace_rpcgss_createauth(flavor, err);
return ERR_PTR(err);
}
@ -1135,9 +1101,6 @@ gss_destroy(struct rpc_auth *auth)
struct gss_auth *gss_auth = container_of(auth,
struct gss_auth, rpc_auth);
dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
auth, auth->au_flavor);
if (hash_hashed(&gss_auth->hash)) {
spin_lock(&gss_auth_hash_lock);
hash_del(&gss_auth->hash);
@ -1245,7 +1208,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
struct gss_cred *new;
/* Make a copy of the cred so that we can reference count it */
new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
new = kzalloc(sizeof(*gss_cred), GFP_NOFS);
if (new) {
struct auth_cred acred = {
.cred = gss_cred->gc_base.cr_cred,
@ -1300,8 +1263,6 @@ gss_send_destroy_context(struct rpc_cred *cred)
static void
gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
dprintk("RPC: %s\n", __func__);
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
kfree(ctx->gc_acceptor.data);
@ -1324,7 +1285,6 @@ gss_free_ctx(struct gss_cl_ctx *ctx)
static void
gss_free_cred(struct gss_cred *gss_cred)
{
dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
kfree(gss_cred);
}
@ -1381,10 +1341,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
struct gss_cred *cred = NULL;
int err = -ENOMEM;
dprintk("RPC: %s for uid %d, flavor %d\n",
__func__, from_kuid(&init_user_ns, acred->cred->fsuid),
auth->au_flavor);
if (!(cred = kzalloc(sizeof(*cred), gfp)))
goto out_err;
@ -1400,7 +1356,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
return &cred->gc_base;
out_err:
dprintk("RPC: %s failed with error %d\n", __func__, err);
return ERR_PTR(err);
}
@ -1526,69 +1481,84 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
}
/*
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
static __be32 *
gss_marshal(struct rpc_task *task, __be32 *p)
* Marshal credentials.
*
* The expensive part is computing the verifier. We can't cache a
* pre-computed version of the verifier because the seqno, which
* is different every time, is included in the MIC.
*/
static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *cred_len;
__be32 *p, *cred_len;
u32 maj_stat = 0;
struct xdr_netobj mic;
struct kvec iov;
struct xdr_buf verf_buf;
int status;
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
/* Credential */
*p++ = htonl(RPC_AUTH_GSS);
p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
ctx->gc_wire_ctx.len);
if (!p)
goto marshal_failed;
*p++ = rpc_auth_gss;
cred_len = p++;
spin_lock(&ctx->gc_seq_lock);
req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
spin_unlock(&ctx->gc_seq_lock);
if (req->rq_seqno == MAXSEQ)
goto out_expired;
goto expired;
trace_rpcgss_seqno(task);
*p++ = htonl((u32) RPC_GSS_VERSION);
*p++ = htonl((u32) ctx->gc_proc);
*p++ = htonl((u32) req->rq_seqno);
*p++ = htonl((u32) gss_cred->gc_service);
*p++ = cpu_to_be32(RPC_GSS_VERSION);
*p++ = cpu_to_be32(ctx->gc_proc);
*p++ = cpu_to_be32(req->rq_seqno);
*p++ = cpu_to_be32(gss_cred->gc_service);
p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
*cred_len = htonl((p - (cred_len + 1)) << 2);
*cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
/* Verifier */
/* We compute the checksum for the verifier over the xdr-encoded bytes
* starting with the xid and ending at the end of the credential: */
iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
req->rq_snd_buf.head[0].iov_base);
iov.iov_base = req->rq_snd_buf.head[0].iov_base;
iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
xdr_buf_from_iov(&iov, &verf_buf);
/* set verifier flavor*/
*p++ = htonl(RPC_AUTH_GSS);
p = xdr_reserve_space(xdr, sizeof(*p));
if (!p)
goto marshal_failed;
*p++ = rpc_auth_gss;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
goto out_expired;
} else if (maj_stat != 0) {
pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
task->tk_status = -EIO;
goto out_put_ctx;
}
p = xdr_encode_opaque(p, NULL, mic.len);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
goto expired;
else if (maj_stat != 0)
goto bad_mic;
if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
goto marshal_failed;
status = 0;
out:
gss_put_ctx(ctx);
return p;
out_expired:
return status;
expired:
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
task->tk_status = -EKEYEXPIRED;
out_put_ctx:
gss_put_ctx(ctx);
return NULL;
status = -EKEYEXPIRED;
goto out;
marshal_failed:
status = -EMSGSIZE;
goto out;
bad_mic:
trace_rpcgss_get_mic(task, maj_stat);
status = -EIO;
goto out;
}
static int gss_renew_cred(struct rpc_task *task)
@ -1662,116 +1632,105 @@ gss_refresh_null(struct rpc_task *task)
return 0;
}
static __be32 *
gss_validate(struct rpc_task *task, __be32 *p)
static int
gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *seq = NULL;
__be32 *p, *seq = NULL;
struct kvec iov;
struct xdr_buf verf_buf;
struct xdr_netobj mic;
u32 flav,len;
u32 maj_stat;
__be32 *ret = ERR_PTR(-EIO);
u32 len, maj_stat;
int status;
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
goto validate_failed;
if (*p++ != rpc_auth_gss)
goto validate_failed;
len = be32_to_cpup(p);
if (len > RPC_MAX_AUTH_SIZE)
goto validate_failed;
p = xdr_inline_decode(xdr, len);
if (!p)
goto validate_failed;
flav = ntohl(*p++);
if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
goto out_bad;
if (flav != RPC_AUTH_GSS)
goto out_bad;
seq = kmalloc(4, GFP_NOFS);
if (!seq)
goto out_bad;
*seq = htonl(task->tk_rqstp->rq_seqno);
goto validate_failed;
*seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
iov.iov_base = seq;
iov.iov_len = 4;
xdr_buf_from_iov(&iov, &verf_buf);
mic.data = (u8 *)p;
mic.len = len;
ret = ERR_PTR(-EACCES);
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat) {
dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
task->tk_pid, __func__, maj_stat);
goto out_bad;
}
if (maj_stat)
goto bad_mic;
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
status = 0;
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
task->tk_pid, __func__);
kfree(seq);
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
PTR_ERR(ret));
kfree(seq);
return ret;
return status;
validate_failed:
status = -EIO;
goto out;
bad_mic:
trace_rpcgss_verify_mic(task, maj_stat);
status = -EACCES;
goto out;
}
static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_task *task, struct xdr_stream *xdr)
{
struct xdr_stream xdr;
xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
encode(rqstp, &xdr, obj);
}
static inline int
gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
struct xdr_buf integ_buf;
__be32 *integ_len = NULL;
struct rpc_rqst *rqstp = task->tk_rqstp;
struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
struct xdr_netobj mic;
u32 offset;
__be32 *q;
struct kvec *iov;
u32 maj_stat = 0;
int status = -EIO;
__be32 *p, *integ_len;
u32 offset, maj_stat;
p = xdr_reserve_space(xdr, 2 * sizeof(*p));
if (!p)
goto wrap_failed;
integ_len = p++;
*p = cpu_to_be32(rqstp->rq_seqno);
if (rpcauth_wrap_req_encode(task, xdr))
goto wrap_failed;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
gss_wrap_req_encode(encode, rqstp, p, obj);
if (xdr_buf_subsegment(snd_buf, &integ_buf,
offset, snd_buf->len - offset))
return status;
*integ_len = htonl(integ_buf.len);
goto wrap_failed;
*integ_len = cpu_to_be32(integ_buf.len);
/* guess whether we're in the head or the tail: */
if (snd_buf->page_len || snd_buf->tail[0].iov_len)
iov = snd_buf->tail;
else
iov = snd_buf->head;
p = iov->iov_base + iov->iov_len;
p = xdr_reserve_space(xdr, 0);
if (!p)
goto wrap_failed;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
status = -EIO; /* XXX? */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
return status;
q = xdr_encode_opaque(p, NULL, mic.len);
offset = (u8 *)q - (u8 *)p;
iov->iov_len += offset;
snd_buf->len += offset;
goto bad_mic;
/* Check that the trailing MIC fit in the buffer, after the fact */
if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
goto wrap_failed;
return 0;
wrap_failed:
return -EMSGSIZE;
bad_mic:
trace_rpcgss_get_mic(task, maj_stat);
return -EIO;
}
static void
@ -1822,61 +1781,62 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
return -EAGAIN;
}
static inline int
gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_rqst *rqstp = task->tk_rqstp;
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
u32 offset;
u32 maj_stat;
u32 pad, offset, maj_stat;
int status;
__be32 *opaque_len;
__be32 *p, *opaque_len;
struct page **inpages;
int first;
int pad;
struct kvec *iov;
char *tmp;
status = -EIO;
p = xdr_reserve_space(xdr, 2 * sizeof(*p));
if (!p)
goto wrap_failed;
opaque_len = p++;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
*p = cpu_to_be32(rqstp->rq_seqno);
gss_wrap_req_encode(encode, rqstp, p, obj);
if (rpcauth_wrap_req_encode(task, xdr))
goto wrap_failed;
status = alloc_enc_pages(rqstp);
if (status)
return status;
if (unlikely(status))
goto wrap_failed;
first = snd_buf->page_base >> PAGE_SHIFT;
inpages = snd_buf->pages + first;
snd_buf->pages = rqstp->rq_enc_pages;
snd_buf->page_base -= first << PAGE_SHIFT;
/*
* Give the tail its own page, in case we need extra space in the
* head when wrapping:
* Move the tail into its own page, in case gss_wrap needs
* more space in the head when wrapping.
*
* call_allocate() allocates twice the slack space required
* by the authentication flavor to rq_callsize.
* For GSS, slack is GSS_CRED_SLACK.
* Still... Why can't gss_wrap just slide the tail down?
*/
if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
char *tmp;
tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
snd_buf->tail[0].iov_base = tmp;
}
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
BUG_ON(snd_buf->len > snd_buf->buflen);
status = -EIO;
if (unlikely(snd_buf->len > snd_buf->buflen))
goto wrap_failed;
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
return status;
goto bad_wrap;
*opaque_len = htonl(snd_buf->len - offset);
/* guess whether we're in the head or the tail: */
*opaque_len = cpu_to_be32(snd_buf->len - offset);
/* guess whether the pad goes into the head or the tail: */
if (snd_buf->page_len || snd_buf->tail[0].iov_len)
iov = snd_buf->tail;
else
@ -1888,118 +1848,154 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
snd_buf->len += pad;
return 0;
wrap_failed:
return status;
bad_wrap:
trace_rpcgss_wrap(task, maj_stat);
return -EIO;
}
static int
gss_wrap_req(struct rpc_task *task,
kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
int status = -EIO;
int status;
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
gss_wrap_req_encode(encode, rqstp, p, obj);
status = 0;
status = rpcauth_wrap_req_encode(task, xdr);
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
gss_wrap_req_encode(encode, rqstp, p, obj);
status = 0;
status = rpcauth_wrap_req_encode(task, xdr);
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
status = gss_wrap_req_integ(cred, ctx, task, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
status = gss_wrap_req_priv(cred, ctx, task, xdr);
break;
default:
status = -EIO;
}
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
return status;
}
static inline int
gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_rqst *rqstp, __be32 **p)
static int
gss_unwrap_resp_auth(struct rpc_cred *cred)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct xdr_buf integ_buf;
struct xdr_netobj mic;
u32 data_offset, mic_offset;
u32 integ_len;
u32 maj_stat;
int status = -EIO;
struct rpc_auth *auth = cred->cr_auth;
integ_len = ntohl(*(*p)++);
auth->au_rslack = auth->au_verfsize;
auth->au_ralign = auth->au_verfsize;
return 0;
}
static int
gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
struct xdr_stream *xdr)
{
struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
u32 data_offset, mic_offset, integ_len, maj_stat;
struct rpc_auth *auth = cred->cr_auth;
struct xdr_netobj mic;
__be32 *p;
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (unlikely(!p))
goto unwrap_failed;
integ_len = be32_to_cpup(p++);
if (integ_len & 3)
return status;
data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
goto unwrap_failed;
data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
mic_offset = integ_len + data_offset;
if (mic_offset > rcv_buf->len)
return status;
if (ntohl(*(*p)++) != rqstp->rq_seqno)
return status;
if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
mic_offset - data_offset))
return status;
goto unwrap_failed;
if (be32_to_cpup(p) != rqstp->rq_seqno)
goto bad_seqno;
if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
goto unwrap_failed;
if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
return status;
goto unwrap_failed;
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
return status;
goto bad_mic;
auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
auth->au_ralign = auth->au_verfsize + 2;
return 0;
unwrap_failed:
trace_rpcgss_unwrap_failed(task);
return -EIO;
bad_seqno:
trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
return -EIO;
bad_mic:
trace_rpcgss_verify_mic(task, maj_stat);
return -EIO;
}
static inline int
gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_rqst *rqstp, __be32 **p)
static int
gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
struct xdr_stream *xdr)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
u32 offset;
u32 opaque_len;
u32 maj_stat;
int status = -EIO;
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct kvec *head = rqstp->rq_rcv_buf.head;
struct rpc_auth *auth = cred->cr_auth;
unsigned int savedlen = rcv_buf->len;
u32 offset, opaque_len, maj_stat;
__be32 *p;
opaque_len = ntohl(*(*p)++);
offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (unlikely(!p))
goto unwrap_failed;
opaque_len = be32_to_cpup(p++);
offset = (u8 *)(p) - (u8 *)head->iov_base;
if (offset + opaque_len > rcv_buf->len)
return status;
/* remove padding: */
goto unwrap_failed;
rcv_buf->len = offset + opaque_len;
maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
return status;
if (ntohl(*(*p)++) != rqstp->rq_seqno)
return status;
goto bad_unwrap;
/* gss_unwrap decrypted the sequence number */
if (be32_to_cpup(p++) != rqstp->rq_seqno)
goto bad_seqno;
/* gss_unwrap redacts the opaque blob from the head iovec.
* rcv_buf has changed, thus the stream needs to be reset.
*/
xdr_init_decode(xdr, rcv_buf, p, rqstp);
auth->au_rslack = auth->au_verfsize + 2 +
XDR_QUADLEN(savedlen - rcv_buf->len);
auth->au_ralign = auth->au_verfsize + 2 +
XDR_QUADLEN(savedlen - rcv_buf->len);
return 0;
}
static int
gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
{
struct xdr_stream xdr;
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
return decode(rqstp, &xdr, obj);
unwrap_failed:
trace_rpcgss_unwrap_failed(task);
return -EIO;
bad_seqno:
trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
return -EIO;
bad_unwrap:
trace_rpcgss_unwrap(task, maj_stat);
return -EIO;
}
static bool
@ -2014,14 +2010,14 @@ gss_xmit_need_reencode(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
u32 win, seq_xmit;
u32 win, seq_xmit = 0;
bool ret = true;
if (!ctx)
return true;
goto out;
if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
goto out;
goto out_ctx;
seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
@ -2030,56 +2026,51 @@ gss_xmit_need_reencode(struct rpc_task *task)
seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
if (seq_xmit == tmp) {
ret = false;
goto out;
goto out_ctx;
}
}
win = ctx->gc_win;
if (win > 0)
ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
out:
out_ctx:
gss_put_ctx(ctx);
out:
trace_rpcgss_need_reencode(task, seq_xmit, ret);
return ret;
}
static int
gss_unwrap_resp(struct rpc_task *task,
kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct rpc_rqst *rqstp = task->tk_rqstp;
struct rpc_cred *cred = rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *savedp = p;
struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
int savedlen = head->iov_len;
int status = -EIO;
int status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA)
goto out_decode;
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
status = gss_unwrap_resp_auth(cred);
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
if (status)
goto out;
status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
if (status)
goto out;
status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr);
break;
}
/* take into account extra slack for integrity and privacy cases: */
cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
+ (savedlen - head->iov_len);
if (status)
goto out;
out_decode:
status = gss_unwrap_req_decode(decode, rqstp, p, obj);
status = rpcauth_unwrap_resp_decode(task, xdr);
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s returning %d\n",
task->tk_pid, __func__, status);
return status;
}

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/gss_krb5_mech.c
*
@ -6,32 +7,6 @@
*
* Andy Adamson <andros@umich.edu>
* J. Bruce Fields <bfields@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <crypto/hash.h>
@ -53,6 +28,7 @@
static struct gss_api_mech gss_kerberos_mech; /* forward declaration */
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
/*
* DES (All DES enctypes are mapped to the same gss functionality)
*/
@ -74,6 +50,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.cksumlength = 8,
.keyed_cksum = 0,
},
#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
/*
* RC4-HMAC
*/

View File

@ -570,14 +570,16 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
*/
movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
buf->head[0].iov_len);
if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
buf->head[0].iov_len)
return GSS_S_FAILURE;
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
/* Trim off the trailing "extra count" and checksum blob */
xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
return GSS_S_COMPLETE;
}

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/gss_mech_switch.c
*
@ -5,32 +6,6 @@
* All rights reserved.
*
* J. Bruce Fields <bfields@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/types.h>

View File

@ -1,21 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* linux/net/sunrpc/gss_rpc_upcall.c
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>

View File

@ -1,21 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* linux/net/sunrpc/gss_rpc_upcall.h
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _GSS_RPC_UPCALL_H
@ -45,4 +32,5 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data);
void init_gssp_clnt(struct sunrpc_net *);
int set_gssp_clnt(struct net *);
void clear_gssp_clnt(struct sunrpc_net *);
#endif /* _GSS_RPC_UPCALL_H */

View File

@ -1,21 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/sunrpc/svcauth.h>

View File

@ -1,21 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _LINUX_GSS_RPC_XDR_H
@ -262,6 +249,4 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
#define GSSX_ARG_wrap_size_limit_sz 0
#define GSSX_RES_wrap_size_limit_sz 0
#endif /* _LINUX_GSS_RPC_XDR_H */

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Neil Brown <neilb@cse.unsw.edu.au>
* J. Bruce Fields <bfields@umich.edu>
@ -896,7 +897,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
if (svc_getnl(&buf->head[0]) != seq)
goto out;
/* trim off the mic and padding at the end before returning */
xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
buf->len -= 4 + round_up_to_quad(mic.len);
stat = 0;
out:
kfree(mic.data);

View File

@ -0,0 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2018, 2019 Oracle. All rights reserved.
*/
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/gss_err.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rpcgss.h>

View File

@ -59,15 +59,21 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
/*
* Marshal credential.
*/
static __be32 *
nul_marshal(struct rpc_task *task, __be32 *p)
static int
nul_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
__be32 *p;
return p;
p = xdr_reserve_space(xdr, 4 * sizeof(*p));
if (!p)
return -EMSGSIZE;
/* Credential */
*p++ = rpc_auth_null;
*p++ = xdr_zero;
/* Verifier */
*p++ = rpc_auth_null;
*p = xdr_zero;
return 0;
}
/*
@ -80,25 +86,19 @@ nul_refresh(struct rpc_task *task)
return 0;
}
static __be32 *
nul_validate(struct rpc_task *task, __be32 *p)
static int
nul_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
rpc_authflavor_t flavor;
u32 size;
__be32 *p;
flavor = ntohl(*p++);
if (flavor != RPC_AUTH_NULL) {
printk("RPC: bad verf flavor: %u\n", flavor);
return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size != 0) {
printk("RPC: bad verf size: %u\n", size);
return ERR_PTR(-EIO);
}
return p;
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
return -EIO;
if (*p++ != rpc_auth_null)
return -EIO;
if (*p != xdr_zero)
return -EIO;
return 0;
}
const struct rpc_authops authnull_ops = {
@ -114,6 +114,8 @@ static
struct rpc_auth null_auth = {
.au_cslack = NUL_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
.au_verfsize = NUL_REPLYSLACK,
.au_ralign = NUL_REPLYSLACK,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
.au_count = REFCOUNT_INIT(1),
@ -125,8 +127,10 @@ const struct rpc_credops null_credops = {
.crdestroy = nul_destroy_cred,
.crmatch = nul_match,
.crmarshal = nul_marshal,
.crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = nul_refresh,
.crvalidate = nul_validate,
.crunwrap_resp = rpcauth_unwrap_resp_decode,
};
static

View File

@ -28,8 +28,6 @@ static mempool_t *unix_pool;
static struct rpc_auth *
unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
refcount_inc(&unix_auth.au_count);
return &unix_auth;
}
@ -37,7 +35,6 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
static void
unx_destroy(struct rpc_auth *auth)
{
dprintk("RPC: destroying UNIX authenticator %p\n", auth);
}
/*
@ -48,10 +45,6 @@ unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
from_kuid(&init_user_ns, acred->cred->fsuid),
from_kgid(&init_user_ns, acred->cred->fsgid));
rpcauth_init_cred(ret, acred, auth, &unix_credops);
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
return ret;
@ -61,7 +54,7 @@ static void
unx_free_cred_callback(struct rcu_head *head)
{
struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
dprintk("RPC: unx_free_cred %p\n", rpc_cred);
put_cred(rpc_cred->cr_cred);
mempool_free(rpc_cred, unix_pool);
}
@ -87,7 +80,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
return 0;
if (acred->cred && acred->cred->group_info != NULL)
if (acred->cred->group_info != NULL)
groups = acred->cred->group_info->ngroups;
if (groups > UNX_NGROUPS)
groups = UNX_NGROUPS;
@ -106,37 +99,55 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
static __be32 *
unx_marshal(struct rpc_task *task, __be32 *p)
static int
unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
__be32 *base, *hold;
__be32 *p, *cred_len, *gidarr_len;
int i;
struct group_info *gi = cred->cr_cred->group_info;
*p++ = htonl(RPC_AUTH_UNIX);
base = p++;
*p++ = htonl(jiffies/HZ);
/* Credential */
/*
* Copy the UTS nodename captured when the client was created.
*/
p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
p = xdr_reserve_space(xdr, 3 * sizeof(*p));
if (!p)
goto marshal_failed;
*p++ = rpc_auth_unix;
cred_len = p++;
*p++ = xdr_zero; /* stamp */
if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename,
clnt->cl_nodelen) < 0)
goto marshal_failed;
p = xdr_reserve_space(xdr, 3 * sizeof(*p));
if (!p)
goto marshal_failed;
*p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
*p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
*p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
hold = p++;
gidarr_len = p++;
if (gi)
for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
*p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
*hold = htonl(p - hold - 1); /* gid array length */
*base = htonl((p - base - 1) << 2); /* cred length */
*p++ = cpu_to_be32(from_kgid(&init_user_ns,
gi->gid[i]));
*gidarr_len = cpu_to_be32(p - gidarr_len - 1);
*cred_len = cpu_to_be32((p - cred_len - 1) << 2);
p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
if (!p)
goto marshal_failed;
*p++ = htonl(RPC_AUTH_NULL);
*p++ = htonl(0);
/* Verifier */
return p;
p = xdr_reserve_space(xdr, 2 * sizeof(*p));
if (!p)
goto marshal_failed;
*p++ = rpc_auth_null;
*p = xdr_zero;
return 0;
marshal_failed:
return -EMSGSIZE;
}
/*
@ -149,29 +160,35 @@ unx_refresh(struct rpc_task *task)
return 0;
}
static __be32 *
unx_validate(struct rpc_task *task, __be32 *p)
static int
unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
rpc_authflavor_t flavor;
u32 size;
struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
__be32 *p;
u32 size;
flavor = ntohl(*p++);
if (flavor != RPC_AUTH_NULL &&
flavor != RPC_AUTH_UNIX &&
flavor != RPC_AUTH_SHORT) {
printk("RPC: bad verf flavor: %u\n", flavor);
return ERR_PTR(-EIO);
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
return -EIO;
switch (*p++) {
case rpc_auth_null:
case rpc_auth_unix:
case rpc_auth_short:
break;
default:
return -EIO;
}
size = be32_to_cpup(p);
if (size > RPC_MAX_AUTH_SIZE)
return -EIO;
p = xdr_inline_decode(xdr, size);
if (!p)
return -EIO;
size = ntohl(*p++);
if (size > RPC_MAX_AUTH_SIZE) {
printk("RPC: giant verf size: %u\n", size);
return ERR_PTR(-EIO);
}
task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
p += (size >> 2);
return p;
auth->au_verfsize = XDR_QUADLEN(size) + 2;
auth->au_rslack = XDR_QUADLEN(size) + 2;
auth->au_ralign = XDR_QUADLEN(size) + 2;
return 0;
}
int __init rpc_init_authunix(void)
@ -198,6 +215,7 @@ static
struct rpc_auth unix_auth = {
.au_cslack = UNX_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
.au_verfsize = NUL_REPLYSLACK,
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
.au_count = REFCOUNT_INIT(1),
@ -209,6 +227,8 @@ const struct rpc_credops unix_credops = {
.crdestroy = unx_destroy_cred,
.crmatch = unx_match,
.crmarshal = unx_marshal,
.crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = unx_refresh,
.crvalidate = unx_validate,
.crunwrap_resp = rpcauth_unwrap_resp_decode,
};

View File

@ -235,7 +235,8 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid,
struct rpc_rqst *new)
{
struct rpc_rqst *req = NULL;
@ -243,22 +244,20 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
if (atomic_read(&xprt->bc_free_slots) <= 0)
goto not_found;
if (list_empty(&xprt->bc_pa_list)) {
req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
if (!req)
if (!new)
goto not_found;
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
}
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
req->rq_bytes_sent = 0;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
req->rq_xid = xid;
req->rq_connect_cookie = xprt->connect_cookie;
not_found:
dprintk("RPC: backchannel req=%p\n", req);
not_found:
return req;
}
@ -321,18 +320,27 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
*/
struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
struct rpc_rqst *req;
struct rpc_rqst *req, *new = NULL;
spin_lock(&xprt->bc_pa_lock);
list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
if (req->rq_connect_cookie != xprt->connect_cookie)
continue;
if (req->rq_xid == xid)
goto found;
}
req = xprt_alloc_bc_request(xprt, xid);
do {
spin_lock(&xprt->bc_pa_lock);
list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
if (req->rq_connect_cookie != xprt->connect_cookie)
continue;
if (req->rq_xid == xid)
goto found;
}
req = xprt_get_bc_request(xprt, xid, new);
found:
spin_unlock(&xprt->bc_pa_lock);
spin_unlock(&xprt->bc_pa_lock);
if (new) {
if (req != new)
xprt_free_bc_rqst(new);
break;
} else if (req)
break;
new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
} while (new);
return req;
}

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
@ -784,8 +785,7 @@ void rpc_exit(struct rpc_task *task, int status)
{
task->tk_status = status;
task->tk_action = rpc_exit_task;
if (RPC_IS_QUEUED(task))
rpc_wake_up_queued_task(task->tk_waitqueue, task);
rpc_wake_up_queued_task(task->tk_waitqueue, task);
}
EXPORT_SYMBOL_GPL(rpc_exit);
@ -902,7 +902,10 @@ void rpc_execute(struct rpc_task *task)
static void rpc_async_schedule(struct work_struct *work)
{
unsigned int pflags = memalloc_nofs_save();
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
memalloc_nofs_restore(pflags);
}
/**
@ -921,16 +924,13 @@ static void rpc_async_schedule(struct work_struct *work)
* Most requests are 'small' (under 2KiB) and can be serviced from a
* mempool, ensuring that NFS reads and writes can always proceed,
* and that there is good locality of reference for these buffers.
*
* In order to avoid memory starvation triggering more writebacks of
* NFS requests, we avoid using GFP_KERNEL.
*/
int rpc_malloc(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
gfp_t gfp = GFP_NOFS;
if (RPC_IS_SWAPPER(task))
gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@ -1011,7 +1011,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
static struct rpc_task *
rpc_alloc_task(void)
{
return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
}
/*
@ -1067,7 +1067,10 @@ static void rpc_free_task(struct rpc_task *task)
static void rpc_async_release(struct work_struct *work)
{
unsigned int pflags = memalloc_nofs_save();
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
memalloc_nofs_restore(pflags);
}
static void rpc_release_resources_task(struct rpc_task *task)

View File

@ -1144,17 +1144,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
#endif
/*
* Setup response header for TCP, it has a 4B record length field.
*/
static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
{
struct kvec *resv = &rqstp->rq_res.head[0];
/* tcp needs a space for the record length... */
svc_putnl(resv, 0);
}
/*
* Common routine for processing the RPC request.
*/
@ -1182,10 +1171,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
/* Setup reply header */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_tcp_prep_reply_hdr(rqstp);
svc_putu32(resv, rqstp->rq_xid);
vers = svc_getnl(argv);
@ -1443,6 +1428,10 @@ svc_process(struct svc_rqst *rqstp)
goto out_drop;
}
/* Reserve space for the record marker */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putnl(resv, 0);
/* Returns 1 for send, 0 for drop */
if (likely(svc_process_common(rqstp, argv, resv)))
return svc_send(rqstp);

View File

@ -16,6 +16,7 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/bvec.h>
#include <trace/events/sunrpc.h>
/*
* XDR functions for basic NFS types
@ -162,6 +163,15 @@ xdr_free_bvec(struct xdr_buf *buf)
buf->bvec = NULL;
}
/**
* xdr_inline_pages - Prepare receive buffer for a large reply
* @xdr: xdr_buf into which reply will be placed
* @offset: expected offset where data payload will start, in bytes
* @pages: vector of struct page pointers
* @base: offset in first page where receive should start, in bytes
* @len: expected size of the upper layer data payload, in bytes
*
*/
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
@ -179,6 +189,8 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
tail->iov_base = buf + offset;
tail->iov_len = buflen - offset;
if ((xdr->page_len & 3) == 0)
tail->iov_len -= sizeof(__be32);
xdr->buflen += len;
}
@ -346,13 +358,15 @@ EXPORT_SYMBOL_GPL(_copy_from_pages);
* 'len' bytes. The extra data is not lost, but is instead
* moved into the inlined pages and/or the tail.
*/
static void
static unsigned int
xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
{
struct kvec *head, *tail;
size_t copy, offs;
unsigned int pglen = buf->page_len;
unsigned int result;
result = 0;
tail = buf->tail;
head = buf->head;
@ -366,6 +380,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
copy = tail->iov_len - len;
memmove((char *)tail->iov_base + len,
tail->iov_base, copy);
result += copy;
}
/* Copy from the inlined pages into the tail */
copy = len;
@ -376,11 +391,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
copy = 0;
else if (copy > tail->iov_len - offs)
copy = tail->iov_len - offs;
if (copy != 0)
if (copy != 0) {
_copy_from_pages((char *)tail->iov_base + offs,
buf->pages,
buf->page_base + pglen + offs - len,
copy);
result += copy;
}
/* Do we also need to copy data from the head into the tail ? */
if (len > pglen) {
offs = copy = len - pglen;
@ -390,6 +407,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
(char *)head->iov_base +
head->iov_len - offs,
copy);
result += copy;
}
}
/* Now handle pages */
@ -405,12 +423,15 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
_copy_to_pages(buf->pages, buf->page_base,
(char *)head->iov_base + head->iov_len - len,
copy);
result += copy;
}
head->iov_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
if (buf->len > buf->buflen)
buf->len = buf->buflen;
return result;
}
/**
@ -422,14 +443,16 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
* 'len' bytes. The extra data is not lost, but is instead
* moved into the tail.
*/
static void
static unsigned int
xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
{
struct kvec *tail;
size_t copy;
unsigned int pglen = buf->page_len;
unsigned int tailbuf_len;
unsigned int result;
result = 0;
tail = buf->tail;
BUG_ON (len > pglen);
@ -447,18 +470,22 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
if (tail->iov_len > len) {
char *p = (char *)tail->iov_base + len;
memmove(p, tail->iov_base, tail->iov_len - len);
result += tail->iov_len - len;
} else
copy = tail->iov_len;
/* Copy from the inlined pages into the tail */
_copy_from_pages((char *)tail->iov_base,
buf->pages, buf->page_base + pglen - len,
copy);
result += copy;
}
buf->page_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
if (buf->len > buf->buflen)
buf->len = buf->buflen;
return result;
}
void
@ -483,6 +510,7 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
* @p: current pointer inside XDR buffer
* @rqst: pointer to controlling rpc_rqst, for debugging
*
* Note: at the moment the RPC client only passes the length of our
* scratch buffer in the xdr_buf's header kvec. Previously this
@ -491,7 +519,8 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
* of the buffer length, and takes care of adjusting the kvec
* length for us.
*/
void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct rpc_rqst *rqst)
{
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@ -513,6 +542,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
buf->len += len;
iov->iov_len += len;
}
xdr->rqst = rqst;
}
EXPORT_SYMBOL_GPL(xdr_init_encode);
@ -551,9 +581,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
int frag1bytes, frag2bytes;
if (nbytes > PAGE_SIZE)
return NULL; /* Bigger buffers require special handling */
goto out_overflow; /* Bigger buffers require special handling */
if (xdr->buf->len + nbytes > xdr->buf->buflen)
return NULL; /* Sorry, we're totally out of space */
goto out_overflow; /* Sorry, we're totally out of space */
frag1bytes = (xdr->end - xdr->p) << 2;
frag2bytes = nbytes - frag1bytes;
if (xdr->iov)
@ -582,6 +612,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
out_overflow:
trace_rpc_xdr_overflow(xdr, nbytes);
return NULL;
}
/**
@ -819,8 +852,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer from which to decode data
* @p: current pointer inside XDR buffer
* @rqst: pointer to controlling rpc_rqst, for debugging
*/
void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct rpc_rqst *rqst)
{
xdr->buf = buf;
xdr->scratch.iov_base = NULL;
@ -836,6 +871,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr->nwords -= p - xdr->p;
xdr->p = p;
}
xdr->rqst = rqst;
}
EXPORT_SYMBOL_GPL(xdr_init_decode);
@ -854,7 +890,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
buf->page_len = len;
buf->buflen = len;
buf->len = len;
xdr_init_decode(xdr, buf, NULL);
xdr_init_decode(xdr, buf, NULL, NULL);
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
@ -896,20 +932,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
size_t cplen = (char *)xdr->end - (char *)xdr->p;
if (nbytes > xdr->scratch.iov_len)
return NULL;
goto out_overflow;
p = __xdr_inline_decode(xdr, cplen);
if (p == NULL)
return NULL;
memcpy(cpdest, p, cplen);
if (!xdr_set_next_buffer(xdr))
goto out_overflow;
cpdest += cplen;
nbytes -= cplen;
if (!xdr_set_next_buffer(xdr))
return NULL;
p = __xdr_inline_decode(xdr, nbytes);
if (p == NULL)
return NULL;
memcpy(cpdest, p, nbytes);
return xdr->scratch.iov_base;
out_overflow:
trace_rpc_xdr_overflow(xdr, nbytes);
return NULL;
}
/**
@ -926,14 +965,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;
if (nbytes == 0)
if (unlikely(nbytes == 0))
return xdr->p;
if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
return NULL;
goto out_overflow;
p = __xdr_inline_decode(xdr, nbytes);
if (p != NULL)
return p;
return xdr_copy_to_scratch(xdr, nbytes);
out_overflow:
trace_rpc_xdr_overflow(xdr, nbytes);
return NULL;
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);
@ -943,13 +985,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
struct kvec *iov;
unsigned int nwords = XDR_QUADLEN(len);
unsigned int cur = xdr_stream_pos(xdr);
unsigned int copied, offset;
if (xdr->nwords == 0)
return 0;
/* Realign pages to current pointer position */
iov = buf->head;
iov = buf->head;
if (iov->iov_len > cur) {
xdr_shrink_bufhead(buf, iov->iov_len - cur);
offset = iov->iov_len - cur;
copied = xdr_shrink_bufhead(buf, offset);
trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}
@ -961,7 +1007,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
len = buf->page_len;
else if (nwords < xdr->nwords) {
/* Truncate page data and move it into the tail */
xdr_shrink_pagelen(buf, buf->page_len - len);
offset = buf->page_len - len;
copied = xdr_shrink_pagelen(buf, offset);
trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}
return len;
@ -1102,47 +1150,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
}
EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
/**
* xdr_buf_trim - lop at most "len" bytes off the end of "buf"
* @buf: buf to be trimmed
* @len: number of bytes to reduce "buf" by
*
* Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
* that it's possible that we'll trim less than that amount if the xdr_buf is
* too small, or if (for instance) it's all in the head and the parser has
* already read too far into it.
*/
void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
{
size_t cur;
unsigned int trim = len;
if (buf->tail[0].iov_len) {
cur = min_t(size_t, buf->tail[0].iov_len, trim);
buf->tail[0].iov_len -= cur;
trim -= cur;
if (!trim)
goto fix_len;
}
if (buf->page_len) {
cur = min_t(unsigned int, buf->page_len, trim);
buf->page_len -= cur;
trim -= cur;
if (!trim)
goto fix_len;
}
if (buf->head[0].iov_len) {
cur = min_t(size_t, buf->head[0].iov_len, trim);
buf->head[0].iov_len -= cur;
trim -= cur;
}
fix_len:
buf->len -= (len - trim);
}
EXPORT_SYMBOL_GPL(xdr_buf_trim);
static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
unsigned int this_len;

View File

@ -49,6 +49,7 @@
#include <linux/sunrpc/metrics.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/rcupdate.h>
#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@ -643,11 +644,13 @@ static void xprt_autoclose(struct work_struct *work)
{
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
unsigned int pflags = memalloc_nofs_save();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
wake_up_bit(&xprt->state, XPRT_LOCKED);
memalloc_nofs_restore(pflags);
}
/**
@ -1165,6 +1168,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
trace_xprt_enq_xmit(task, 1);
goto out;
}
} else if (RPC_IS_SWAPPER(task)) {
@ -1176,6 +1180,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
trace_xprt_enq_xmit(task, 2);
goto out;
}
} else if (!req->rq_seqno) {
@ -1184,11 +1189,13 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
continue;
list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
INIT_LIST_HEAD(&req->rq_xmit);
trace_xprt_enq_xmit(task, 3);
goto out;
}
}
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
trace_xprt_enq_xmit(task, 4);
out:
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
@ -1313,8 +1320,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
int is_retrans = RPC_WAS_SENT(task);
int status;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
if (!req->rq_bytes_sent) {
if (xprt_request_data_received(task)) {
status = 0;
@ -1325,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
status = -EBADMSG;
goto out_dequeue;
}
if (task->tk_ops->rpc_call_prepare_transmit) {
task->tk_ops->rpc_call_prepare_transmit(task,
task->tk_calldata);
status = task->tk_status;
if (status < 0)
goto out_dequeue;
}
}
/*
@ -1336,9 +1348,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
connect_cookie = xprt->connect_cookie;
status = xprt->ops->send_request(req);
trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
req->rq_ntrans--;
trace_xprt_transmit(req, status);
return status;
}
@ -1347,7 +1359,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
xprt_inject_disconnect(xprt);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
spin_lock_bh(&xprt->transport_lock);
@ -1360,6 +1371,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
req->rq_connect_cookie = connect_cookie;
out_dequeue:
trace_xprt_transmit(req, status);
xprt_request_dequeue_transmit(task);
rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
return status;
@ -1599,7 +1611,6 @@ xprt_request_init(struct rpc_task *task)
req->rq_buffer = NULL;
req->rq_xid = xprt_alloc_xid(xprt);
xprt_init_connect_cookie(req, xprt);
req->rq_bytes_sent = 0;
req->rq_snd_buf.len = 0;
req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0;
@ -1721,6 +1732,7 @@ void xprt_release(struct rpc_task *task)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
@ -1749,7 +1761,6 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
req->rq_bytes_sent = 0;
}
#endif

View File

@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
req->rl_rdmabuf->rg_base, rqst);
p = xdr_reserve_space(&req->rl_stream, 28);
if (unlikely(!p))
@ -267,7 +267,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_bytes_sent = 0;
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;

View File

@ -391,7 +391,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, u32 xid,
int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@ -446,7 +446,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
goto out_mapmr_err;
ibmr->iova &= 0x00000000ffffffff;
ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);

View File

@ -164,6 +164,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
}
/* The client is required to provide a Reply chunk if the maximum
* size of the non-payload part of the RPC Reply is larger than
* the inline threshold.
*/
static bool
rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
const struct rpc_rqst *rqst)
{
const struct xdr_buf *buf = &rqst->rq_rcv_buf;
const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return buf->head[0].iov_len + buf->tail[0].iov_len <
ia->ri_max_inline_read;
}
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
* a byte range. Other modes coalesce these SGEs into a single MR
* when they can.
@ -733,7 +748,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
req->rl_rdmabuf->rg_base, rqst);
/* Fixed header fields */
ret = -EMSGSIZE;
@ -762,7 +777,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
*/
if (rpcrdma_results_inline(r_xprt, rqst))
wtype = rpcrdma_noch;
else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ)
else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
rpcrdma_nonpayload_inline(r_xprt, rqst))
wtype = rpcrdma_writech;
else
wtype = rpcrdma_replych;
@ -1313,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
/* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);
rep->rr_hdrbuf.head[0].iov_base, NULL);
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;

View File

@ -304,7 +304,6 @@ xprt_setup_rdma_bc(struct xprt_create *args)
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->prot = XPRT_TRANSPORT_BC_RDMA;
xprt->tsh_size = 0;
xprt->ops = &xprt_rdma_bc_procs;
memcpy(&xprt->addr, args->dstaddr, args->addrlen);

View File

@ -332,7 +332,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
xprt->ops = &xprt_rdma_procs;
/*
@ -738,7 +737,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
/* An RPC with no reply will throw off credit accounting,
* so drop the connection to reset the credit grant.

View File

@ -1481,6 +1481,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (ep->rep_receive_count > needed)
goto out;
needed -= ep->rep_receive_count;
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
count = 0;
wr = NULL;

View File

@ -205,6 +205,16 @@ struct rpcrdma_rep {
struct ib_recv_wr rr_recv_wr;
};
/* To reduce the rate at which a transport invokes ib_post_recv
* (and thus the hardware doorbell rate), xprtrdma posts Receive
* WRs in batches.
*
* Setting this to zero disables Receive post batching.
*/
enum {
RPCRDMA_MAX_RECV_BATCH = 7,
};
/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
*/
struct rpcrdma_req;
@ -577,7 +587,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr);
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, u32 xid,
int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr **mr);
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);

View File

@ -50,6 +50,7 @@
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/uio.h>
#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@ -404,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
size_t want, seek_init = seek, offset = 0;
ssize_t ret;
if (seek < buf->head[0].iov_len) {
want = min_t(size_t, count, buf->head[0].iov_len);
want = min_t(size_t, count, buf->head[0].iov_len);
if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
if (ret <= 0)
goto sock_err;
@ -416,13 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
goto out;
seek = 0;
} else {
seek -= buf->head[0].iov_len;
offset += buf->head[0].iov_len;
seek -= want;
offset += want;
}
want = xs_alloc_sparse_pages(buf,
min_t(size_t, count - offset, buf->page_len),
GFP_NOWAIT);
GFP_KERNEL);
if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
@ -442,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}
if (seek < buf->tail[0].iov_len) {
want = min_t(size_t, count - offset, buf->tail[0].iov_len);
want = min_t(size_t, count - offset, buf->tail[0].iov_len);
if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
if (ret <= 0)
goto sock_err;
@ -453,7 +454,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (ret != want)
goto out;
} else
offset += buf->tail[0].iov_len;
offset = seek_init;
ret = -EMSGSIZE;
out:
*read = offset - seek_init;
@ -481,6 +482,14 @@ xs_read_stream_request_done(struct sock_xprt *transport)
return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
}
static void
xs_read_stream_check_eor(struct sock_xprt *transport,
struct msghdr *msg)
{
if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR;
}
static ssize_t
xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
int flags, struct rpc_rqst *req)
@ -492,17 +501,21 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
xs_read_header(transport, buf);
want = transport->recv.len - transport->recv.offset;
ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
transport->recv.copied + want, transport->recv.copied,
&read);
transport->recv.offset += read;
transport->recv.copied += read;
if (transport->recv.offset == transport->recv.len) {
if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR;
return read;
if (want != 0) {
ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
transport->recv.copied + want,
transport->recv.copied,
&read);
transport->recv.offset += read;
transport->recv.copied += read;
}
if (transport->recv.offset == transport->recv.len)
xs_read_stream_check_eor(transport, msg);
if (want == 0)
return 0;
switch (ret) {
default:
break;
@ -655,13 +668,35 @@ xs_read_stream(struct sock_xprt *transport, int flags)
return ret != 0 ? ret : -ESHUTDOWN;
}
static __poll_t xs_poll_socket(struct sock_xprt *transport)
{
return transport->sock->ops->poll(transport->file, transport->sock,
NULL);
}
static bool xs_poll_socket_readable(struct sock_xprt *transport)
{
__poll_t events = xs_poll_socket(transport);
return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
}
static void xs_poll_check_readable(struct sock_xprt *transport)
{
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (!xs_poll_socket_readable(transport))
return;
if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
queue_work(xprtiod_workqueue, &transport->recv_worker);
}
static void xs_stream_data_receive(struct sock_xprt *transport)
{
size_t read = 0;
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
for (;;) {
@ -671,6 +706,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
read += ret;
cond_resched();
}
if (ret == -ESHUTDOWN)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
else
xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
trace_xs_stream_read_data(&transport->xprt, ret, read);
@ -680,7 +719,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
unsigned int pflags = memalloc_nofs_save();
xs_stream_data_receive(transport);
memalloc_nofs_restore(pflags);
}
static void
@ -690,65 +732,65 @@ xs_stream_reset_connect(struct sock_xprt *transport)
transport->recv.len = 0;
transport->recv.copied = 0;
transport->xmit.offset = 0;
}
static void
xs_stream_start_connect(struct sock_xprt *transport)
{
transport->xprt.stat.connect_count++;
transport->xprt.stat.connect_start = jiffies;
}
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
{
struct msghdr msg = {
.msg_name = addr,
.msg_namelen = addrlen,
.msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
};
struct kvec iov = {
.iov_base = vec->iov_base + base,
.iov_len = vec->iov_len - base,
};
if (iov.iov_len != 0)
return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
return kernel_sendmsg(sock, &msg, NULL, 0, 0);
if (seek)
iov_iter_advance(&msg->msg_iter, seek);
return sock_sendmsg(sock, msg);
}
static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
{
iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
return xs_sendmsg(sock, msg, seek);
}
static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
{
ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
struct page **ppage;
unsigned int remainder;
int err;
remainder = xdr->page_len - base;
base += xdr->page_base;
ppage = xdr->pages + (base >> PAGE_SHIFT);
base &= ~PAGE_MASK;
do_sendpage = sock->ops->sendpage;
if (!zerocopy)
do_sendpage = sock_no_sendpage;
for(;;) {
unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
int flags = XS_SENDMSG_FLAGS;
err = xdr_alloc_bvec(xdr, GFP_KERNEL);
if (err < 0)
return err;
remainder -= len;
if (more)
flags |= MSG_MORE;
if (remainder != 0)
flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
err = do_sendpage(sock, *ppage, base, len, flags);
if (remainder == 0 || err != len)
break;
*sent_p += err;
ppage++;
base = 0;
}
if (err > 0) {
*sent_p += err;
err = 0;
}
return err;
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
xdr_buf_pagecount(xdr),
xdr->page_len + xdr->page_base);
return xs_sendmsg(sock, msg, base + xdr->page_base);
}
#define xs_record_marker_len() sizeof(rpc_fraghdr)
/* Common case:
* - stream transport
* - sending from byte 0 of the message
* - the message is wholly contained in @xdr's head iovec
*/
static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
rpc_fraghdr marker, struct kvec *vec, size_t base)
{
struct kvec iov[2] = {
[0] = {
.iov_base = &marker,
.iov_len = sizeof(marker)
},
[1] = *vec,
};
size_t len = iov[0].iov_len + iov[1].iov_len;
iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
return xs_sendmsg(sock, msg, base);
}
/**
@ -758,49 +800,60 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
* @zerocopy: true if it is safe to use sendpage()
* @rm: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
*/
static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
{
unsigned int remainder = xdr->len - base;
struct msghdr msg = {
.msg_name = addr,
.msg_namelen = addrlen,
.msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
};
unsigned int rmsize = rm ? sizeof(rm) : 0;
unsigned int remainder = rmsize + xdr->len - base;
unsigned int want;
int err = 0;
int sent = 0;
if (unlikely(!sock))
return -ENOTSOCK;
if (base != 0) {
addr = NULL;
addrlen = 0;
}
if (base < xdr->head[0].iov_len || addr != NULL) {
unsigned int len = xdr->head[0].iov_len - base;
want = xdr->head[0].iov_len + rmsize;
if (base < want) {
unsigned int len = want - base;
remainder -= len;
err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
if (remainder == 0)
msg.msg_flags &= ~MSG_MORE;
if (rmsize)
err = xs_send_rm_and_kvec(sock, &msg, rm,
&xdr->head[0], base);
else
err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
base -= xdr->head[0].iov_len;
base -= want;
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
*sent_p += sent;
if (remainder == 0 || sent != len)
if (remainder == 0)
msg.msg_flags &= ~MSG_MORE;
err = xs_send_pagedata(sock, &msg, xdr, base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
return 0;
err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
msg.msg_flags &= ~MSG_MORE;
err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
@ -856,7 +909,7 @@ static int xs_nospace(struct rpc_rqst *req)
static void
xs_stream_prepare_request(struct rpc_rqst *req)
{
req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
}
/*
@ -870,13 +923,14 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
}
/*
* Construct a stream transport record marker in @buf.
* Return the stream record marker field for a record of length < 2^31-1
*/
static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
static rpc_fraghdr
xs_stream_record_marker(struct xdr_buf *xdr)
{
u32 reclen = buf->len - sizeof(rpc_fraghdr);
rpc_fraghdr *base = buf->head[0].iov_base;
*base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
if (!xdr->len)
return 0;
return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
}
/**
@ -905,15 +959,14 @@ static int xs_local_send_request(struct rpc_rqst *req)
return -ENOTCONN;
}
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
true, &sent);
xs_stream_record_marker(xdr),
&sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
@ -925,7 +978,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@ -981,7 +1033,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
xdr, 0, true, &sent);
xdr, 0, 0, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len, status);
@ -1045,7 +1097,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
bool zerocopy = true;
bool vm_wait = false;
int status;
int sent;
@ -1057,17 +1108,9 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
return -ENOTCONN;
}
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
/* Don't use zero copy if this is a resend. If the RPC call
* completes while the socket holds a reference to the pages,
* then we may end up resending corrupted data.
*/
if (req->rq_task->tk_flags & RPC_TASK_SENT)
zerocopy = false;
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
xs_tcp_set_socket_timeouts(xprt, transport->sock);
@ -1080,7 +1123,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
zerocopy, &sent);
xs_stream_record_marker(xdr),
&sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status);
@ -1091,7 +1135,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@ -1211,6 +1254,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
struct rpc_xprt *xprt = &transport->xprt;
struct file *filp = transport->file;
if (sk == NULL)
return;
@ -1224,6 +1268,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
transport->file = NULL;
sk->sk_user_data = NULL;
@ -1231,10 +1276,12 @@ static void xs_reset_transport(struct sock_xprt *transport)
xprt_clear_connected(xprt);
write_unlock_bh(&sk->sk_callback_lock);
xs_sock_reset_connection_flags(xprt);
/* Reset stream record info */
xs_stream_reset_connect(transport);
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
sock_release(sock);
fput(filp);
xprt_disconnect_done(xprt);
}
@ -1358,7 +1405,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
@ -1370,6 +1416,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
consume_skb(skb);
cond_resched();
}
xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
}
@ -1378,7 +1425,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
unsigned int pflags = memalloc_nofs_save();
xs_udp_data_receive(transport);
memalloc_nofs_restore(pflags);
}
/**
@ -1826,6 +1876,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct sock_xprt *transport, int family, int type,
int protocol, bool reuseport)
{
struct file *filp;
struct socket *sock;
int err;
@ -1846,6 +1897,11 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
transport->file = filp;
return sock;
out:
return ERR_PTR(err);
@ -1869,7 +1925,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_NOIO;
xprt_clear_connected(xprt);
@ -1880,7 +1935,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
write_unlock_bh(&sk->sk_callback_lock);
}
xs_stream_reset_connect(transport);
xs_stream_start_connect(transport);
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
}
@ -1892,6 +1947,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
static int xs_local_setup_socket(struct sock_xprt *transport)
{
struct rpc_xprt *xprt = &transport->xprt;
struct file *filp;
struct socket *sock;
int status = -EIO;
@ -1904,6 +1960,13 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
}
xs_reclassify_socket(AF_LOCAL, sock);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp)) {
status = PTR_ERR(filp);
goto out;
}
transport->file = filp;
dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
@ -2057,7 +2120,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_allocation = GFP_NOIO;
xprt_set_connected(xprt);
@ -2220,7 +2282,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_write_space = xs_tcp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_NOIO;
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
@ -2240,8 +2301,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_set_memalloc(xprt);
/* Reset TCP record info */
xs_stream_reset_connect(transport);
xs_stream_start_connect(transport);
/* Tell the socket layer to start connecting... */
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
@ -2534,26 +2594,35 @@ static int bc_sendto(struct rpc_rqst *req)
{
int len;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
container_of(req->rq_xprt, struct sock_xprt, xprt);
unsigned long headoff;
unsigned long tailoff;
struct page *tailpage;
struct msghdr msg = {
.msg_flags = MSG_MORE
};
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xbufp->len);
struct kvec iov = {
.iov_base = &marker,
.iov_len = sizeof(marker),
};
xs_encode_stream_record_marker(xbufp);
len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
if (len != iov.iov_len)
return -EAGAIN;
tailpage = NULL;
if (xbufp->tail[0].iov_len)
tailpage = virt_to_page(xbufp->tail[0].iov_base);
tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
len = svc_send_common(sock, xbufp,
len = svc_send_common(transport->sock, xbufp,
virt_to_page(xbufp->head[0].iov_base), headoff,
xbufp->tail[0].iov_base, tailoff);
if (len != xbufp->len) {
printk(KERN_NOTICE "Error sending entire callback!\n");
len = -EAGAIN;
}
tailpage, tailoff);
if (len != xbufp->len)
return -EAGAIN;
return len;
}
@ -2793,7 +2862,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = 0;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->bind_timeout = XS_BIND_TO;
@ -2862,7 +2930,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_UDP;
xprt->tsh_size = 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
@ -2942,7 +3009,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->bind_timeout = XS_BIND_TO;
@ -3015,7 +3081,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->timeout = &xs_tcp_default_timeout;