From ad1633a151df9869a222bd99ba04643dc2e0052b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Jan 2017 22:35:31 -0500 Subject: [PATCH 01/14] namei: fold unlazy_link() into its sole caller Signed-off-by: Al Viro --- fs/namei.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index ad74877e1442..f08eca2b788b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -749,24 +749,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq return -ECHILD; } -static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq) -{ - if (unlikely(!legitimize_path(nd, link, seq))) { - drop_links(nd); - nd->depth = 0; - nd->flags &= ~LOOKUP_RCU; - nd->path.mnt = NULL; - nd->path.dentry = NULL; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) { - return 0; - } - path_put(link); - return -ECHILD; -} - static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { return dentry->d_op->d_revalidate(dentry, flags); @@ -1706,9 +1688,17 @@ static int pick_link(struct nameidata *nd, struct path *link, error = nd_alloc_stack(nd); if (unlikely(error)) { if (error == -ECHILD) { - if (unlikely(unlazy_link(nd, link, seq))) - return -ECHILD; - error = nd_alloc_stack(nd); + if (unlikely(!legitimize_path(nd, link, seq))) { + drop_links(nd); + nd->depth = 0; + nd->flags &= ~LOOKUP_RCU; + nd->path.mnt = NULL; + nd->path.dentry = NULL; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) + error = nd_alloc_stack(nd); } if (error) { path_put(link); From 209a7fb2104f2724f651870306c65f86850ee953 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jan 2017 01:35:39 -0500 Subject: [PATCH 02/14] lookup_fast(): clean up the logics around the fallback to non-rcu mode Signed-off-by: Al Viro --- fs/namei.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f08eca2b788b..dfe6e32aeec6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1558,12 +1558,7 @@ static int lookup_fast(struct nameidata *nd, *seqp = seq; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) status = d_revalidate(dentry, nd->flags); - if (unlikely(status <= 0)) { - if (unlazy_walk(nd, dentry, seq)) - return -ECHILD; - if (status == -ECHILD) - status = d_revalidate(dentry, nd->flags); - } else { + if (likely(status > 0)) { /* * Note: do negative dentry check after revalidation in * case that drops it. @@ -1574,9 +1569,12 @@ static int lookup_fast(struct nameidata *nd, path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode, seqp))) return 1; - if (unlazy_walk(nd, dentry, seq)) - return -ECHILD; } + if (unlazy_walk(nd, dentry, seq)) + return -ECHILD; + if (unlikely(status == -ECHILD)) + /* we'd been told to redo it in non-rcu mode */ + status = d_revalidate(dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) From a89f833737e6c75df0091ccf6c767b94745463c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jan 2017 22:25:28 -0500 Subject: [PATCH 03/14] namei.c: fold the check for DCACHE_OP_REVALIDATE into d_revalidate() Signed-off-by: Al Viro --- fs/namei.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index dfe6e32aeec6..2ed2701a74b5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -751,7 +751,10 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { - return dentry->d_op->d_revalidate(dentry, flags); + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) + return dentry->d_op->d_revalidate(dentry, flags); + else + return 1; } /** @@ -1454,19 +1457,14 @@ static struct dentry *lookup_dcache(const struct qstr *name, struct dentry *dir, unsigned int flags) { - struct dentry *dentry; - int error; - - dentry = d_lookup(dir, name); + struct dentry *dentry = d_lookup(dir, name); if (dentry) { - if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, flags); - if (unlikely(error <= 0)) { - if (!error) - d_invalidate(dentry); - dput(dentry); - return ERR_PTR(error); - } + int error = d_revalidate(dentry, flags); + if (unlikely(error <= 0)) { + if (!error) + d_invalidate(dentry); + dput(dentry); + return ERR_PTR(error); } } return dentry; @@ -1556,8 +1554,7 @@ static int lookup_fast(struct nameidata *nd, return -ECHILD; *seqp = seq; - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(dentry, nd->flags); if (likely(status > 0)) { /* * Note: do negative dentry check after revalidation in @@ -1579,8 +1576,7 @@ static int lookup_fast(struct nameidata *nd, dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return 0; - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) @@ -1619,8 +1615,7 @@ static struct dentry *lookup_slow(const struct qstr *name, if (IS_ERR(dentry)) goto out; if (unlikely(!d_in_lookup(dentry))) { - if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && - !(flags & LOOKUP_NO_REVAL)) { + if (!(flags & LOOKUP_NO_REVAL)) { int error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (!error) { @@ -3057,9 +3052,6 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (d_in_lookup(dentry)) break; - if (!(dentry->d_flags & DCACHE_OP_REVALIDATE)) - break; - error = d_revalidate(dentry, nd->flags); if (likely(error > 0)) break; From 4675ac39b5dd5ff08dd8cb2be9ddd3cba778aa39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jan 2017 22:29:15 -0500 Subject: [PATCH 04/14] namei.c: split unlazy_walk() In all but one case, the last two arguments are NULL and 0 resp.; almost everyone just wants to switch nameidata to non-RCU mode. The only exception is lookup_fast(), where we have a child dentry we want to legitimize as well. Split these two cases. Signed-off-by: Al Viro --- fs/namei.c | 105 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 39 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 2ed2701a74b5..76cfeb641e97 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -672,52 +672,83 @@ static bool legitimize_links(struct nameidata *nd) /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data - * @dentry: child of nd->path.dentry or NULL - * @seq: seq number to check dentry against * Returns: 0 on success, -ECHILD on failure * - * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry - * for ref-walk mode. @dentry must be a path found by a do_lookup call on - * @nd or NULL. Must be called from rcu-walk context. + * unlazy_walk attempts to legitimize the current nd->path and nd->root + * for ref-walk mode. + * Must be called from rcu-walk context. * Nothing should touch nameidata between unlazy_walk() failure and * terminate_walk(). */ -static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq) +static int unlazy_walk(struct nameidata *nd) { struct dentry *parent = nd->path.dentry; BUG_ON(!(nd->flags & LOOKUP_RCU)); + nd->flags &= ~LOOKUP_RCU; + if (unlikely(!legitimize_links(nd))) + goto out2; + if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) + goto out1; + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { + if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) + goto out; + } + rcu_read_unlock(); + BUG_ON(nd->inode != parent->d_inode); + return 0; + +out2: + nd->path.mnt = NULL; + nd->path.dentry = NULL; +out1: + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; +out: + rcu_read_unlock(); + return -ECHILD; +} + +/** + * unlazy_child - try to switch to ref-walk mode. + * @nd: nameidata pathwalk data + * @dentry: child of nd->path.dentry + * @seq: seq number to check dentry against + * Returns: 0 on success, -ECHILD on failure + * + * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry + * for ref-walk mode. @dentry must be a path found by a do_lookup call on + * @nd. Must be called from rcu-walk context. + * Nothing should touch nameidata between unlazy_child() failure and + * terminate_walk(). + */ +static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq) +{ + BUG_ON(!(nd->flags & LOOKUP_RCU)); + nd->flags &= ~LOOKUP_RCU; if (unlikely(!legitimize_links(nd))) goto out2; if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) goto out2; - if (unlikely(!lockref_get_not_dead(&parent->d_lockref))) + if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) goto out1; /* - * For a negative lookup, the lookup sequence point is the parents - * sequence point, and it only needs to revalidate the parent dentry. - * - * For a positive lookup, we need to move both the parent and the - * dentry from the RCU domain to be properly refcounted. And the - * sequence number in the dentry validates *both* dentry counters, - * since we checked the sequence number of the parent after we got - * the child sequence number. So we know the parent must still - * be valid if the child sequence number is still valid. + * We need to move both the parent and the dentry from the RCU domain + * to be properly refcounted. And the sequence number in the dentry + * validates *both* dentry counters, since we checked the sequence + * number of the parent after we got the child sequence number. So we + * know the parent must still be valid if the child sequence number is */ - if (!dentry) { - if (read_seqcount_retry(&parent->d_seq, nd->seq)) - goto out; - BUG_ON(nd->inode != parent->d_inode); - } else { - if (!lockref_get_not_dead(&dentry->d_lockref)) - goto out; - if (read_seqcount_retry(&dentry->d_seq, seq)) - goto drop_dentry; + if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) + goto out; + if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) { + rcu_read_unlock(); + dput(dentry); + goto drop_root_mnt; } - /* * Sequence counts matched. Now make sure that the root is * still valid and get it if required. @@ -733,10 +764,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq rcu_read_unlock(); return 0; -drop_dentry: - rcu_read_unlock(); - dput(dentry); - goto drop_root_mnt; out2: nd->path.mnt = NULL; out1: @@ -775,7 +802,7 @@ static int complete_walk(struct nameidata *nd) if (nd->flags & LOOKUP_RCU) { if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - if (unlikely(unlazy_walk(nd, NULL, 0))) + if (unlikely(unlazy_walk(nd))) return -ECHILD; } @@ -1001,7 +1028,7 @@ const char *get_link(struct nameidata *nd) touch_atime(&last->link); cond_resched(); } else if (atime_needs_update_rcu(&last->link, inode)) { - if (unlikely(unlazy_walk(nd, NULL, 0))) + if (unlikely(unlazy_walk(nd))) return ERR_PTR(-ECHILD); touch_atime(&last->link); } @@ -1020,7 +1047,7 @@ const char *get_link(struct nameidata *nd) if (nd->flags & LOOKUP_RCU) { res = get(NULL, inode, &last->done); if (res == ERR_PTR(-ECHILD)) { - if (unlikely(unlazy_walk(nd, NULL, 0))) + if (unlikely(unlazy_walk(nd))) return ERR_PTR(-ECHILD); res = get(dentry, inode, &last->done); } @@ -1529,7 +1556,7 @@ static int lookup_fast(struct nameidata *nd, bool negative; dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (unlikely(!dentry)) { - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; return 0; } @@ -1567,7 +1594,7 @@ static int lookup_fast(struct nameidata *nd, if (likely(__follow_mount_rcu(nd, path, inode, seqp))) return 1; } - if (unlazy_walk(nd, dentry, seq)) + if (unlazy_child(nd, dentry, seq)) return -ECHILD; if (unlikely(status == -ECHILD)) /* we'd been told to redo it in non-rcu mode */ @@ -1646,7 +1673,7 @@ static inline int may_lookup(struct nameidata *nd) int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; } return inode_permission(nd->inode, MAY_EXEC); @@ -1690,7 +1717,7 @@ static int pick_link(struct nameidata *nd, struct path *link, if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) + } else if (likely(unlazy_walk(nd)) == 0) error = nd_alloc_stack(nd); } if (error) { @@ -2108,7 +2135,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; } return -ENOTDIR; @@ -2565,7 +2592,7 @@ mountpoint_last(struct nameidata *nd) /* If we're in rcuwalk, drop out of it to handle last component */ if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; } From af7bd4dc13093bf1477f370722bbab24cf457b91 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:52 +0200 Subject: [PATCH 05/14] vfs: create vfs helper vfs_tmpfile() Factor out some common vfs bits from do_tmpfile() to be used by overlayfs for concurrent copy up. Signed-off-by: Amir Goldstein Cc: Al Viro Signed-off-by: Miklos Szeredi --- fs/namei.c | 68 +++++++++++++++++++++++++++++----------------- include/linux/fs.h | 3 ++ 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index ad74877e1442..7d87699c3e2e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3353,13 +3353,50 @@ static int do_last(struct nameidata *nd, return error; } +struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag) +{ + static const struct qstr name = QSTR_INIT("/", 1); + struct dentry *child = NULL; + struct inode *dir = dentry->d_inode; + struct inode *inode; + int error; + + /* we want directory to be writable */ + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + if (error) + goto out_err; + error = -EOPNOTSUPP; + if (!dir->i_op->tmpfile) + goto out_err; + error = -ENOMEM; + child = d_alloc(dentry, &name); + if (unlikely(!child)) + goto out_err; + error = dir->i_op->tmpfile(dir, child, mode); + if (error) + goto out_err; + error = -ENOENT; + inode = child->d_inode; + if (unlikely(!inode)) + goto out_err; + if (!(open_flag & O_EXCL)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_LINKABLE; + spin_unlock(&inode->i_lock); + } + return child; + +out_err: + dput(child); + return ERR_PTR(error); +} +EXPORT_SYMBOL(vfs_tmpfile); + static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file, int *opened) { - static const struct qstr name = QSTR_INIT("/", 1); struct dentry *child; - struct inode *dir; struct path path; int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) @@ -3367,25 +3404,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; - dir = path.dentry->d_inode; - /* we want directory to be writable */ - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); - if (error) + child = vfs_tmpfile(path.dentry, op->mode, op->open_flag); + error = PTR_ERR(child); + if (unlikely(IS_ERR(child))) goto out2; - if (!dir->i_op->tmpfile) { - error = -EOPNOTSUPP; - goto out2; - } - child = d_alloc(path.dentry, &name); - if (unlikely(!child)) { - error = -ENOMEM; - goto out2; - } dput(path.dentry); path.dentry = child; - error = dir->i_op->tmpfile(dir, child, op->mode); - if (error) - goto out2; audit_inode(nd->name, child, 0); /* Don't check for other permissions, the inode was just created */ error = may_open(&path, 0, op->open_flag); @@ -3396,14 +3420,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, if (error) goto out2; error = open_check_o_direct(file); - if (error) { + if (error) fput(file); - } else if (!(op->open_flag & O_EXCL)) { - struct inode *inode = file_inode(file); - spin_lock(&inode->i_lock); - inode->i_state |= I_LINKABLE; - spin_unlock(&inode->i_lock); - } out2: mnt_drop_write(path.mnt); out: diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ba074328894..4a7f3cc9edab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); +extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, + int open_flag); + /* * VFS file helper functions. */ From 9e79b1326302589a035fe20e8cce7c1a7d8333ed Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 31 Jan 2017 10:34:55 +0200 Subject: [PATCH 06/14] vfs: deny fallocate() on directory There was an obscure use case of fallocate of directory inode in the vfs helper with the comment: "Let individual file system decide if it supports preallocation for directories or not." But there is no in-tree file system that implements fallocate for directory operations. Deny an attempt to fallocate a directory with EISDIR error. This change is needed prior to converting sb_start_write() to file_start_write(), so freeze protection is correctly handled for cases of fallocate file and blockdev. Cc: linux-api@vger.kernel.org Cc: Al Viro Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Miklos Szeredi --- fs/open.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/open.c b/fs/open.c index 9921f70bc5ca..f9118f4113e7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -301,12 +301,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (S_ISFIFO(inode->i_mode)) return -ESPIPE; - /* - * Let individual file system decide if it supports preallocation - * for directories or not. - */ - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && - !S_ISBLK(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) return -ENODEV; /* Check for wrap through zero too */ From 11cbfb10775aa2a01cee966d118049ede9d0bdf2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 31 Jan 2017 10:34:56 +0200 Subject: [PATCH 07/14] vfs: deny copy_file_range() for non regular files There is no in-tree file system that implements copy_file_range() for non regular files. Deny an attempt to copy_file_range() a directory with EISDIR and any other non regualr file with EINVAL to conform with behavior of vfs_{clone,dedup}_file_range(). This change is needed prior to converting sb_start_write() to file_start_write() in the vfs helper. Cc: linux-api@vger.kernel.org Cc: Al Viro Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Miklos Szeredi --- fs/read_write.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 5816d4c4cab0..511178d7723b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1518,6 +1518,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (flags != 0) return -EINVAL; + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) return ret; From bfe219d373cadab761373aeea4c70406bc27ea2c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 31 Jan 2017 10:34:57 +0200 Subject: [PATCH 08/14] vfs: wrap write f_ops with file_{start,end}_write() Before calling write f_ops, call file_start_write() instead of sb_start_write(). Replace {sb,file}_start_write() for {copy,clone}_file_range() and for fallocate(). Beyond correct semantics, this avoids freeze protection to sb when operating on special inodes, such as fallocate() on a blockdev. Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Miklos Szeredi --- fs/open.c | 4 ++-- fs/read_write.c | 4 ++-- include/linux/fs.h | 26 +++++++++++++------------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/open.c b/fs/open.c index f9118f4113e7..949cef29c3bb 100644 --- a/fs/open.c +++ b/fs/open.c @@ -314,7 +314,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!file->f_op->fallocate) return -EOPNOTSUPP; - sb_start_write(inode->i_sb); + file_start_write(file); ret = file->f_op->fallocate(file, mode, offset, len); /* @@ -327,7 +327,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (ret == 0) fsnotify_modify(file); - sb_end_write(inode->i_sb); + file_end_write(file); return ret; } EXPORT_SYMBOL_GPL(vfs_fallocate); diff --git a/fs/read_write.c b/fs/read_write.c index 511178d7723b..820a3f06c46a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1543,7 +1543,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (len == 0) return 0; - sb_start_write(inode_out->i_sb); + file_start_write(file_out); /* * Try cloning first, this is supported by more file systems, and @@ -1579,7 +1579,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, inc_syscr(current); inc_syscw(current); - sb_end_write(inode_out->i_sb); + file_end_write(file_out); return ret; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a7f3cc9edab..78c81e6f5d76 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1741,19 +1741,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - sb_start_write(file_inode(file_out)->i_sb); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - sb_end_write(file_inode(file_out)->i_sb); - - return ret; -} - struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); @@ -2564,6 +2551,19 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + file_start_write(file_out); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. From 7687a7a4435f4b32d8e775a7b6b25aea3a25d25a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: [PATCH 09/14] vfs: extract common parts of {compat_,}do_readv_writev() Signed-off-by: Miklos Szeredi --- fs/read_write.c | 80 ++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 820a3f06c46a..12a216021f87 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -834,25 +834,15 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, return ret; } -static ssize_t do_readv_writev(int type, struct file *file, - const struct iovec __user * uvector, - unsigned long nr_segs, loff_t *pos, - int flags) +static ssize_t __do_readv_writev(int type, struct file *file, + struct iov_iter *iter, loff_t *pos, int flags) { size_t tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - ssize_t ret; + ssize_t ret = 0; io_fn_t fn; iter_fn_t iter_fn; - ret = import_iovec(type, uvector, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (ret < 0) - return ret; - - tot_len = iov_iter_count(&iter); + tot_len = iov_iter_count(iter); if (!tot_len) goto out; ret = rw_verify_area(type, file, pos, tot_len); @@ -869,15 +859,14 @@ static ssize_t do_readv_writev(int type, struct file *file, } if (iter_fn) - ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); + ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags); else - ret = do_loop_readv_writev(file, &iter, pos, fn, flags); + ret = do_loop_readv_writev(file, iter, pos, fn, flags); if (type != READ) file_end_write(file); out: - kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); @@ -887,6 +876,27 @@ static ssize_t do_readv_writev(int type, struct file *file, return ret; } +static ssize_t do_readv_writev(int type, struct file *file, + const struct iovec __user *uvector, + unsigned long nr_segs, loff_t *pos, + int flags) +{ + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + ssize_t ret; + + ret = import_iovec(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; + + ret = __do_readv_writev(type, file, &iter, pos, flags); + kfree(iov); + + return ret; +} + ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, unsigned long vlen, loff_t *pos, int flags) { @@ -1064,51 +1074,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, unsigned long nr_segs, loff_t *pos, int flags) { - compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; ssize_t ret; - io_fn_t fn; - iter_fn_t iter_fn; ret = compat_import_iovec(type, uvector, nr_segs, UIO_FASTIOV, &iov, &iter); if (ret < 0) return ret; - tot_len = iov_iter_count(&iter); - if (!tot_len) - goto out; - ret = rw_verify_area(type, file, pos, tot_len); - if (ret < 0) - goto out; - - if (type == READ) { - fn = file->f_op->read; - iter_fn = file->f_op->read_iter; - } else { - fn = (io_fn_t)file->f_op->write; - iter_fn = file->f_op->write_iter; - file_start_write(file); - } - - if (iter_fn) - ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); - else - ret = do_loop_readv_writev(file, &iter, pos, fn, flags); - - if (type != READ) - file_end_write(file); - -out: + ret = __do_readv_writev(type, file, &iter, pos, flags); kfree(iov); - if ((ret + (type == READ)) > 0) { - if (type == READ) - fsnotify_access(file); - else - fsnotify_modify(file); - } + return ret; } From 0f78d06ac1e9b470cbd8f913ee1688c8b2c8feb3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: [PATCH 10/14] vfs: pass type instead of fn to do_{loop,iter}_readv_writev() Signed-off-by: Miklos Szeredi --- fs/read_write.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 12a216021f87..198614f757fa 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -23,9 +23,6 @@ #include #include -typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); -typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); - const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -675,7 +672,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) EXPORT_SYMBOL(iov_shorten); static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, iter_fn_t fn, int flags) + loff_t *ppos, int type, int flags) { struct kiocb kiocb; ssize_t ret; @@ -692,7 +689,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); kiocb.ki_pos = *ppos; - ret = fn(&kiocb, iter); + if (type == READ) + ret = filp->f_op->read_iter(&kiocb, iter); + else + ret = filp->f_op->write_iter(&kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; @@ -700,7 +700,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, /* Do it by hand, with file-ops */ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, io_fn_t fn, int flags) + loff_t *ppos, int type, int flags) { ssize_t ret = 0; @@ -711,7 +711,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, struct iovec iovec = iov_iter_iovec(iter); ssize_t nr; - nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); + if (type == READ) { + nr = filp->f_op->read(filp, iovec.iov_base, + iovec.iov_len, ppos); + } else { + nr = filp->f_op->write(filp, iovec.iov_base, + iovec.iov_len, ppos); + } if (nr < 0) { if (!ret) @@ -839,8 +845,6 @@ static ssize_t __do_readv_writev(int type, struct file *file, { size_t tot_len; ssize_t ret = 0; - io_fn_t fn; - iter_fn_t iter_fn; tot_len = iov_iter_count(iter); if (!tot_len) @@ -849,19 +853,14 @@ static ssize_t __do_readv_writev(int type, struct file *file, if (ret < 0) goto out; - if (type == READ) { - fn = file->f_op->read; - iter_fn = file->f_op->read_iter; - } else { - fn = (io_fn_t)file->f_op->write; - iter_fn = file->f_op->write_iter; + if (type != READ) file_start_write(file); - } - if (iter_fn) - ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags); + if ((type == READ && file->f_op->read_iter) || + (type == WRITE && file->f_op->write_iter)) + ret = do_iter_readv_writev(file, iter, pos, type, flags); else - ret = do_loop_readv_writev(file, iter, pos, fn, flags); + ret = do_loop_readv_writev(file, iter, pos, type, flags); if (type != READ) file_end_write(file); From bb7462b6fd64e40809a857223bf7f0e628969f87 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: [PATCH 11/14] vfs: use helpers for calling f_op->{read,write}_iter() Signed-off-by: Miklos Szeredi --- drivers/block/loop.c | 4 ++-- fs/aio.c | 4 ++-- fs/read_write.c | 12 ++++++------ fs/splice.c | 2 +- include/linux/fs.h | 12 ++++++++++++ 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f347285c67ec..2cf2903a0715 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -501,9 +501,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_flags = IOCB_DIRECT; if (rw == WRITE) - ret = file->f_op->write_iter(&cmd->iocb, &iter); + ret = call_write_iter(file, &cmd->iocb, &iter); else - ret = file->f_op->read_iter(&cmd->iocb, &iter); + ret = call_read_iter(file, &cmd->iocb, &iter); if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); diff --git a/fs/aio.c b/fs/aio.c index 4ab67e8cb776..9f4a9a2e7ae4 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1494,7 +1494,7 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) - ret = aio_ret(req, file->f_op->read_iter(req, &iter)); + ret = aio_ret(req, call_read_iter(file, req, &iter)); kfree(iovec); return ret; } @@ -1519,7 +1519,7 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, if (!ret) { req->ki_flags |= IOCB_WRITE; file_start_write(file); - ret = aio_ret(req, file->f_op->write_iter(req, &iter)); + ret = aio_ret(req, call_write_iter(file, req, &iter)); /* * We release freeze protection in aio_complete(). Fool lockdep * by telling it the lock got released so that it doesn't diff --git a/fs/read_write.c b/fs/read_write.c index 198614f757fa..f2ed9fdc98fd 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -367,7 +367,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) kiocb.ki_pos = *ppos; iter->type |= READ; - ret = file->f_op->read_iter(&kiocb, iter); + ret = call_read_iter(file, &kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; @@ -387,7 +387,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) kiocb.ki_pos = *ppos; iter->type |= WRITE; - ret = file->f_op->write_iter(&kiocb, iter); + ret = call_write_iter(file, &kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; @@ -436,7 +436,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo kiocb.ki_pos = *ppos; iov_iter_init(&iter, READ, &iov, 1, len); - ret = filp->f_op->read_iter(&kiocb, &iter); + ret = call_read_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; @@ -493,7 +493,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t kiocb.ki_pos = *ppos; iov_iter_init(&iter, WRITE, &iov, 1, len); - ret = filp->f_op->write_iter(&kiocb, &iter); + ret = call_write_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; @@ -690,9 +690,9 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, kiocb.ki_pos = *ppos; if (type == READ) - ret = filp->f_op->read_iter(&kiocb, iter); + ret = call_read_iter(filp, &kiocb, iter); else - ret = filp->f_op->write_iter(&kiocb, iter); + ret = call_write_iter(filp, &kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; diff --git a/fs/splice.c b/fs/splice.c index 873d83104e79..6518f058bd7f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -306,7 +306,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, idx = to.idx; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; - ret = in->f_op->read_iter(&kiocb, &to); + ret = call_read_iter(in, &kiocb, &to); if (ret > 0) { *ppos = kiocb.ki_pos; file_accessed(in); diff --git a/include/linux/fs.h b/include/linux/fs.h index 78c81e6f5d76..a3145cdff8f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1715,6 +1715,18 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); } ____cacheline_aligned; +static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->read_iter(kio, iter); +} + +static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->write_iter(kio, iter); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, From f74ac01520c9f6d89bbc3c6931a72f757b742f86 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: [PATCH 12/14] mm: use helper for calling f_op->mmap() Signed-off-by: Miklos Szeredi --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- drivers/gpu/drm/vgem/vgem_drv.c | 2 +- fs/coda/file.c | 2 +- include/linux/fs.h | 5 +++++ ipc/shm.c | 2 +- mm/mmap.c | 2 +- mm/nommu.c | 4 ++-- 7 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 5e38299b5df6..84fc9f001576 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -141,7 +141,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * if (!obj->base.filp) return -ENODEV; - ret = obj->base.filp->f_op->mmap(obj->base.filp, vma); + ret = call_mmap(obj->base.filp, vma); if (ret) return ret; diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 477e07f0ecb6..9f7e222b3e89 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -287,7 +287,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj, if (!obj->filp) return -ENODEV; - ret = obj->filp->f_op->mmap(obj->filp, vma); + ret = call_mmap(obj->filp, vma); if (ret) return ret; diff --git a/fs/coda/file.c b/fs/coda/file.c index 6e0154eb6fcc..9d956cd6d46f 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -96,7 +96,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) cfi->cfi_mapcount++; spin_unlock(&cii->c_lock); - return host_file->f_op->mmap(host_file, vma); + return call_mmap(host_file, vma); } int coda_open(struct inode *coda_inode, struct file *coda_file) diff --git a/include/linux/fs.h b/include/linux/fs.h index a3145cdff8f2..93691b59e476 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1727,6 +1727,11 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, return file->f_op->write_iter(kio, iter); } +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + return file->f_op->mmap(file, vma); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, diff --git a/ipc/shm.c b/ipc/shm.c index 81203e8ba013..4329fe3ef594 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -423,7 +423,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - ret = sfd->file->f_op->mmap(sfd->file, vma); + ret = call_mmap(sfd->file, vma); if (ret) { shm_close(vma); return ret; diff --git a/mm/mmap.c b/mm/mmap.c index dc4291dcc99b..3714aa4e6f81 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1668,7 +1668,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * new file must not have been exposed to user-space, yet. */ vma->vm_file = get_file(file); - error = file->f_op->mmap(file, vma); + error = call_mmap(file, vma); if (error) goto unmap_and_free_vma; diff --git a/mm/nommu.c b/mm/nommu.c index 24f9f5f39145..e366354f777d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1084,7 +1084,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) { int ret; - ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + ret = call_mmap(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; @@ -1115,7 +1115,7 @@ static int do_mmap_private(struct vm_area_struct *vma, * - VM_MAYSHARE will be set if it may attempt to share */ if (capabilities & NOMMU_MAP_DIRECT) { - ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + ret = call_mmap(vma->vm_file, vma); if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); From 0eb8af4916a540c362a2950e5ab54eca32eb7d58 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: [PATCH 13/14] vfs: use helper for calling f_op->fsync() Signed-off-by: Miklos Szeredi --- fs/sync.c | 2 +- include/linux/fs.h | 6 ++++++ ipc/shm.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index 2a54c1f22035..11ba023434b1 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) spin_unlock(&inode->i_lock); mark_inode_dirty_sync(inode); } - return file->f_op->fsync(file, start, end, datasync); + return call_fsync(file, start, end, datasync); } EXPORT_SYMBOL(vfs_fsync_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index 93691b59e476..72a33007ec24 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1732,6 +1732,12 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } +static inline int call_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + return file->f_op->fsync(file, start, end, datasync); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, diff --git a/ipc/shm.c b/ipc/shm.c index 4329fe3ef594..258aff2e03bb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (!sfd->file->f_op->fsync) return -EINVAL; - return sfd->file->f_op->fsync(sfd->file, start, end, datasync); + return call_fsync(sfd->file, start, end, datasync); } static long shm_fallocate(struct file *file, int mode, loff_t offset, From 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Feb 2017 16:43:36 +0100 Subject: [PATCH 14/14] orangefs: Use RCU for destroy_inode freeing of inodes must be RCU-delayed on all filesystems Cc: stable@vger.kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Al Viro --- fs/orangefs/super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index c48859f16e7b..67c24351a67f 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) return &orangefs_inode->vfs_inode; } +static void orangefs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + kmem_cache_free(orangefs_inode_cache, orangefs_inode); +} + static void orangefs_destroy_inode(struct inode *inode) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); @@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode) "%s: deallocated %p destroying inode %pU\n", __func__, orangefs_inode, get_khandle_from_ino(inode)); - kmem_cache_free(orangefs_inode_cache, orangefs_inode); + call_rcu(&inode->i_rcu, orangefs_i_callback); } /*