orangefs: don't bother with splitting iovecs

copy_page_{to,from}_iter() advances it just fine *and* it has no
problem with partially consumed segments.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
This commit is contained in:
Al Viro 2015-10-08 18:17:26 -04:00 committed by Mike Marshall
parent 3c2fcfcb68
commit dc4067f671
1 changed files with 6 additions and 276 deletions

View File

@ -256,168 +256,6 @@ static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode,
return ret;
}
/*
* The reason we need to do this is to be able to support readv and writev
* that are larger than (pvfs_bufmap_size_query()) Default is
* PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will
* create a new io vec descriptor for those memory addresses that
* go beyond the limit. Return value for this routine is negative in case
* of errors and 0 in case of success.
*
* Further, the new_nr_segs pointer is updated to hold the new value
* of number of iovecs, the new_vec pointer is updated to hold the pointer
* to the new split iovec, and the size array is an array of integers holding
* the number of iovecs that straddle pvfs_bufmap_size_query().
* The max_new_nr_segs value is computed by the caller and returned.
* (It will be (count of all iov_len/ block_size) + 1).
*/
static int split_iovecs(unsigned long max_new_nr_segs, /* IN */
unsigned long nr_segs, /* IN */
const struct iovec *original_iovec, /* IN */
unsigned long *new_nr_segs, /* OUT */
struct iovec **new_vec, /* OUT */
unsigned long *seg_count, /* OUT */
unsigned long **seg_array) /* OUT */
{
unsigned long seg;
unsigned long count = 0;
unsigned long begin_seg;
unsigned long tmpnew_nr_segs = 0;
struct iovec *new_iovec = NULL;
struct iovec *orig_iovec;
unsigned long *sizes = NULL;
unsigned long sizes_count = 0;
if (nr_segs <= 0 ||
original_iovec == NULL ||
new_nr_segs == NULL ||
new_vec == NULL ||
seg_count == NULL ||
seg_array == NULL ||
max_new_nr_segs <= 0) {
gossip_err("Invalid parameters to split_iovecs\n");
return -EINVAL;
}
*new_nr_segs = 0;
*new_vec = NULL;
*seg_count = 0;
*seg_array = NULL;
/* copy the passed in iovec descriptor to a temp structure */
orig_iovec = kmalloc_array(nr_segs,
sizeof(*orig_iovec),
PVFS2_BUFMAP_GFP_FLAGS);
if (orig_iovec == NULL) {
gossip_err(
"split_iovecs: Could not allocate memory for %lu bytes!\n",
(unsigned long)(nr_segs * sizeof(*orig_iovec)));
return -ENOMEM;
}
new_iovec = kcalloc(max_new_nr_segs,
sizeof(*new_iovec),
PVFS2_BUFMAP_GFP_FLAGS);
if (new_iovec == NULL) {
kfree(orig_iovec);
gossip_err(
"split_iovecs: Could not allocate memory for %lu bytes!\n",
(unsigned long)(max_new_nr_segs * sizeof(*new_iovec)));
return -ENOMEM;
}
sizes = kcalloc(max_new_nr_segs,
sizeof(*sizes),
PVFS2_BUFMAP_GFP_FLAGS);
if (sizes == NULL) {
kfree(new_iovec);
kfree(orig_iovec);
gossip_err(
"split_iovecs: Could not allocate memory for %lu bytes!\n",
(unsigned long)(max_new_nr_segs * sizeof(*sizes)));
return -ENOMEM;
}
/* copy the passed in iovec to a temp structure */
memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec));
begin_seg = 0;
repeat:
for (seg = begin_seg; seg < nr_segs; seg++) {
if (tmpnew_nr_segs >= max_new_nr_segs ||
sizes_count >= max_new_nr_segs) {
kfree(sizes);
kfree(orig_iovec);
kfree(new_iovec);
gossip_err
("split_iovecs: exceeded the index limit (%lu)\n",
tmpnew_nr_segs);
return -EINVAL;
}
if (count + orig_iovec[seg].iov_len <
pvfs_bufmap_size_query()) {
count += orig_iovec[seg].iov_len;
memcpy(&new_iovec[tmpnew_nr_segs],
&orig_iovec[seg],
sizeof(*new_iovec));
tmpnew_nr_segs++;
sizes[sizes_count]++;
} else {
new_iovec[tmpnew_nr_segs].iov_base =
orig_iovec[seg].iov_base;
new_iovec[tmpnew_nr_segs].iov_len =
(pvfs_bufmap_size_query() - count);
tmpnew_nr_segs++;
sizes[sizes_count]++;
sizes_count++;
begin_seg = seg;
orig_iovec[seg].iov_base +=
(pvfs_bufmap_size_query() - count);
orig_iovec[seg].iov_len -=
(pvfs_bufmap_size_query() - count);
count = 0;
break;
}
}
if (seg != nr_segs)
goto repeat;
else
sizes_count++;
*new_nr_segs = tmpnew_nr_segs;
/* new_iovec is freed by the caller */
*new_vec = new_iovec;
*seg_count = sizes_count;
/* seg_array is also freed by the caller */
*seg_array = sizes;
kfree(orig_iovec);
return 0;
}
static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs,
ssize_t *total_count)
{
unsigned long i;
long max_nr_iovecs;
ssize_t total;
ssize_t count;
total = 0;
count = 0;
max_nr_iovecs = 0;
for (i = 0; i < nr_segs; i++) {
const struct iovec *iv = &curr[i];
count += iv->iov_len;
if (unlikely((ssize_t) (count | iv->iov_len) < 0))
return -EINVAL;
if (total + iv->iov_len < pvfs_bufmap_size_query()) {
total += iv->iov_len;
max_nr_iovecs++;
} else {
total =
(total + iv->iov_len - pvfs_bufmap_size_query());
max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2);
}
}
*total_count = count;
return max_nr_iovecs;
}
/*
* Common entry point for read/write/readv/writev
* This function will dispatch it to either the direct I/O
@ -431,25 +269,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
struct inode *inode = file->f_mapping->host;
struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle;
ssize_t ret;
ssize_t total_count;
unsigned int to_free;
size_t count;
unsigned long seg;
unsigned long new_nr_segs;
unsigned long max_new_nr_segs;
unsigned long seg_count;
unsigned long *seg_array;
struct iovec *iovecptr;
struct iovec *ptr;
total_count = 0;
ret = -EINVAL;
count = 0;
to_free = 0;
/* Compute total and max number of segments after split */
max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count);
struct iov_iter iter;
size_t count = iov_length(iov, nr_segs);
ssize_t total_count = 0;
ssize_t ret = -EINVAL;
gossip_debug(GOSSIP_FILE_DEBUG,
"%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
@ -472,93 +295,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
goto out;
}
/*
* if the total size of data transfer requested is greater than
* the kernel-set blocksize of PVFS2, then we split the iovecs
* such that no iovec description straddles a block size limit
*/
iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE,
iov, nr_segs, count);
gossip_debug(GOSSIP_FILE_DEBUG,
"%s: pvfs_bufmap_size:%d\n",
__func__,
pvfs_bufmap_size_query());
if (count > pvfs_bufmap_size_query()) {
/*
* Split up the given iovec description such that
* no iovec descriptor straddles over the block-size limitation.
* This makes us our job easier to stage the I/O.
* In addition, this function will also compute an array
* with seg_count entries that will store the number of
* segments that straddle the block-size boundaries.
*/
ret = split_iovecs(max_new_nr_segs, /* IN */
nr_segs, /* IN */
iov, /* IN */
&new_nr_segs, /* OUT */
&iovecptr, /* OUT */
&seg_count, /* OUT */
&seg_array); /* OUT */
if (ret < 0) {
gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n",
__func__,
ret);
goto out;
}
gossip_debug(GOSSIP_FILE_DEBUG,
"%s: Splitting iovecs from %lu to %lu"
" [max_new %lu]\n",
__func__,
nr_segs,
new_nr_segs,
max_new_nr_segs);
/* We must free seg_array and iovecptr */
to_free = 1;
} else {
new_nr_segs = nr_segs;
/* use the given iovec description */
iovecptr = (struct iovec *)iov;
/* There is only 1 element in the seg_array */
seg_count = 1;
/* and its value is the number of segments passed in */
seg_array = &nr_segs;
/* We dont have to free up anything */
to_free = 0;
}
ptr = iovecptr;
gossip_debug(GOSSIP_FILE_DEBUG,
"%s(%pU) %zd@%llu\n",
__func__,
handle,
count,
llu(*offset));
gossip_debug(GOSSIP_FILE_DEBUG,
"%s(%pU): new_nr_segs: %lu, seg_count: %lu\n",
__func__,
handle,
new_nr_segs, seg_count);
/* PVFS2_KERNEL_DEBUG is a CFLAGS define. */
#ifdef PVFS2_KERNEL_DEBUG
for (seg = 0; seg < new_nr_segs; seg++)
gossip_debug(GOSSIP_FILE_DEBUG,
"%s: %d) %p to %p [%d bytes]\n",
__func__,
(int)seg + 1,
iovecptr[seg].iov_base,
iovecptr[seg].iov_base + iovecptr[seg].iov_len,
(int)iovecptr[seg].iov_len);
for (seg = 0; seg < seg_count; seg++)
gossip_debug(GOSSIP_FILE_DEBUG,
"%s: %zd) %lu\n",
__func__,
seg + 1,
seg_array[seg]);
#endif
seg = 0;
while (total_count < count) {
struct iov_iter iter;
size_t each_count;
size_t amt_complete;
@ -579,9 +319,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
handle,
(int)*offset);
iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE,
ptr, seg_array[seg], each_count);
ret = wait_for_direct_io(type, inode, offset, &iter,
each_count, 0);
gossip_debug(GOSSIP_FILE_DEBUG,
@ -593,9 +330,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
if (ret < 0)
goto out;
/* advance the iovec pointer */
ptr += seg_array[seg];
seg++;
*offset += ret;
total_count += ret;
amt_complete = ret;
@ -617,10 +351,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
if (total_count > 0)
ret = total_count;
out:
if (to_free) {
kfree(iovecptr);
kfree(seg_array);
}
if (ret > 0) {
if (type == PVFS_IO_READ) {
file_accessed(file);