diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index 34e2240f1d29..e37b6479a6a1 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -104,110 +104,137 @@ static ssize_t pvfs2_devreq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - int ret = 0; - ssize_t len = 0; - struct pvfs2_kernel_op_s *cur_op = NULL; - static __s32 magic = PVFS2_DEVREQ_MAGIC; + struct pvfs2_kernel_op_s *op, *temp; __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION; + static __s32 magic = PVFS2_DEVREQ_MAGIC; + struct pvfs2_kernel_op_s *cur_op = NULL; + unsigned long ret; + /* We do not support blocking IO. */ if (!(file->f_flags & O_NONBLOCK)) { - /* We do not support blocking reads/opens any more */ - gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n"); + gossip_err("orangefs: blocking reads are not supported! (pvfs2-client-core bug)\n"); return -EINVAL; - } else { - struct pvfs2_kernel_op_s *op = NULL, *temp = NULL; - /* get next op (if any) from top of list */ - spin_lock(&pvfs2_request_list_lock); - list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) { - __s32 fsid = fsid_of_op(op); - /* - * Check if this op's fsid is known and needs - * remounting - */ - if (fsid != PVFS_FS_ID_NULL && - fs_mount_pending(fsid) == 1) { + } + + /* + * The client will do an ioctl to find MAX_ALIGNED_DEV_REQ_UPSIZE, then + * always read with that size buffer. + */ + if (count != MAX_ALIGNED_DEV_REQ_UPSIZE) { + gossip_err("orangefs: client-core tried to read wrong size\n"); + return -EINVAL; + } + + /* Get next op (if any) from top of list. */ + spin_lock(&pvfs2_request_list_lock); + list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) { + __s32 fsid; + /* This lock is held past the end of the loop when we break. */ + spin_lock(&op->lock); + + fsid = fsid_of_op(op); + if (fsid != PVFS_FS_ID_NULL) { + int ret; + /* Skip ops whose filesystem needs to be mounted. */ + ret = fs_mount_pending(fsid); + if (ret == 1) { gossip_debug(GOSSIP_DEV_DEBUG, - "Skipping op tag %llu %s\n", - llu(op->tag), - get_opname_string(op)); + "orangefs: skipping op tag %llu %s\n", + llu(op->tag), get_opname_string(op)); + spin_unlock(&op->lock); + continue; + /* Skip ops whose filesystem we don't know about unless + * it is being mounted. */ + /* XXX: is there a better way to detect this? */ + } else if (ret == -1 && + !(op->upcall.type == PVFS2_VFS_OP_FS_MOUNT || + op->upcall.type == PVFS2_VFS_OP_GETATTR)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "orangefs: skipping op tag %llu %s\n", + llu(op->tag), get_opname_string(op)); + gossip_err( + "orangefs: ERROR: fs_mount_pending %d\n", + fsid); + spin_unlock(&op->lock); continue; - } else { - /* - * op does not belong to any particular fsid - * or already mounted.. let it through - */ - cur_op = op; - spin_lock(&cur_op->lock); - list_del(&cur_op->list); - spin_unlock(&cur_op->lock); - break; } } - spin_unlock(&pvfs2_request_list_lock); - } - - if (cur_op) { - spin_lock(&cur_op->lock); - - gossip_debug(GOSSIP_DEV_DEBUG, - "client-core: reading op tag %llu %s\n", - llu(cur_op->tag), get_opname_string(cur_op)); - if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { - gossip_err("WARNING: Current op already queued...skipping\n"); - } else { - /* - * atomically move the operation to the - * htable_ops_in_progress - */ - set_op_state_inprogress(cur_op); - pvfs2_devreq_add_op(cur_op); - } - - spin_unlock(&cur_op->lock); - - /* Push the upcall out */ - len = MAX_ALIGNED_DEV_REQ_UPSIZE; - if ((size_t) len <= count) { - ret = copy_to_user(buf, - &proto_ver, - sizeof(__s32)); - if (ret == 0) { - ret = copy_to_user(buf + sizeof(__s32), - &magic, - sizeof(__s32)); - if (ret == 0) { - ret = copy_to_user(buf+2 * sizeof(__s32), - &cur_op->tag, - sizeof(__u64)); - if (ret == 0) { - ret = copy_to_user( - buf + - 2 * - sizeof(__s32) + - sizeof(__u64), - &cur_op->upcall, - sizeof(struct pvfs2_upcall_s)); - } - } - } - - if (ret) { - gossip_err("Failed to copy data to user space\n"); - len = -EFAULT; - } - } else { - gossip_err - ("Failed to copy data to user space\n"); - len = -EIO; - } - } else if (file->f_flags & O_NONBLOCK) { /* - * if in non-blocking mode, return EAGAIN since no requests are - * ready yet + * Either this op does not pertain to a filesystem, is mounting + * a filesystem, or pertains to a mounted filesystem. Let it + * through. */ - len = -EAGAIN; + cur_op = op; + break; } - return len; + + /* + * At this point we either have a valid op and can continue or have not + * found an op and must ask the client to try again later. + */ + if (!cur_op) { + spin_unlock(&pvfs2_request_list_lock); + return -EAGAIN; + } + + gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n", + llu(cur_op->tag), get_opname_string(cur_op)); + + /* + * Such an op should never be on the list in the first place. If so, we + * will abort. + */ + if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { + gossip_err("orangefs: ERROR: Current op already queued.\n"); + list_del(&cur_op->list); + spin_unlock(&cur_op->lock); + spin_unlock(&pvfs2_request_list_lock); + return -EAGAIN; + } + + /* + * Set the operation to be in progress and move it between lists since + * it has been sent to the client. + */ + set_op_state_inprogress(cur_op); + + list_del(&cur_op->list); + spin_unlock(&pvfs2_request_list_lock); + pvfs2_devreq_add_op(cur_op); + spin_unlock(&cur_op->lock); + + /* Push the upcall out. */ + ret = copy_to_user(buf, &proto_ver, sizeof(__s32)); + if (ret != 0) + goto error; + ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32)); + if (ret != 0) + goto error; + ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64)); + if (ret != 0) + goto error; + ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall, + sizeof(struct pvfs2_upcall_s)); + if (ret != 0) + goto error; + + /* The client only asks to read one size buffer. */ + return MAX_ALIGNED_DEV_REQ_UPSIZE; +error: + /* + * We were unable to copy the op data to the client. Put the op back in + * list. If client has crashed, the op will be purged later when the + * device is released. + */ + gossip_err("orangefs: Failed to copy data to user space\n"); + spin_lock(&pvfs2_request_list_lock); + spin_lock(&cur_op->lock); + set_op_state_waiting(cur_op); + pvfs2_devreq_remove_op(cur_op->tag); + list_add(&cur_op->list, &pvfs2_request_list); + spin_unlock(&cur_op->lock); + spin_unlock(&pvfs2_request_list_lock); + return -EFAULT; } /* Function for writev() callers into the device */