From 31747eda41ef3c30c09c5c096b380bf54013746a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 14 Jan 2018 18:35:40 +0200 Subject: [PATCH 01/51] ovl: hash directory inodes for fsnotify fsnotify pins a watched directory inode in cache, but if directory dentry is released, new lookup will allocate a new dentry and a new inode. Directory events will be notified on the new inode, while fsnotify listener is watching the old pinned inode. Hash all directory inodes to reuse the pinned inode on lookup. Pure upper dirs are hashes by real upper inode, merge and lower dirs are hashed by real lower inode. The reference to lower inode was being held by the lower dentry object in the overlay dentry (oe->lowerstack[0]). Releasing the overlay dentry may drop lower inode refcount to zero. Add a refcount on behalf of the overlay inode to prevent that. As a by-product, hashing directory inodes also detects multiple redirected dirs to the same lower dir and uncovered redirected dir target on and returns -ESTALE on lookup. The reported issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13. Cc: #v4.13 Reported-by: Niklas Cassel Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Tested-by: Niklas Cassel --- fs/overlayfs/inode.c | 39 ++++++++++++++++++++++++++++----------- fs/overlayfs/super.c | 1 + fs/overlayfs/util.c | 4 ++-- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 00b6b294272a..94d2f8a8b779 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -606,6 +606,16 @@ static int ovl_inode_set(struct inode *inode, void *data) static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, struct dentry *upperdentry) { + if (S_ISDIR(inode->i_mode)) { + /* Real lower dir moved to upper layer under us? */ + if (!lowerdentry && ovl_inode_lower(inode)) + return false; + + /* Lookup of an uncovered redirect origin? */ + if (!upperdentry && ovl_inode_upper(inode)) + return false; + } + /* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed @@ -633,6 +643,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, struct inode *inode; /* Already indexed or could be indexed on copy up? */ bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + struct dentry *origin = indexed ? lowerdentry : NULL; + bool is_dir; if (WARN_ON(upperdentry && indexed && !lowerdentry)) return ERR_PTR(-EIO); @@ -641,15 +653,19 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed upper, but we must - * not use lower as hash key in that case. - * Hash inodes that are or could be indexed by origin inode and - * non-indexed upper inodes that could be hard linked by upper inode. + * Copy up origin (lower) may exist for non-indexed non-dir upper, but + * we must not use lower as hash key in that case. + * Hash non-dir that is or could be indexed by origin inode. + * Hash dir that is or could be merged by origin inode. + * Hash pure upper and non-indexed non-dir by upper inode. */ - if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { - struct inode *key = d_inode(indexed ? lowerdentry : - upperdentry); - unsigned int nlink; + is_dir = S_ISDIR(realinode->i_mode); + if (is_dir) + origin = lowerdentry; + + if (upperdentry || origin) { + struct inode *key = d_inode(origin ?: upperdentry); + unsigned int nlink = is_dir ? 1 : realinode->i_nlink; inode = iget5_locked(dentry->d_sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); @@ -670,8 +686,9 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, goto out; } - nlink = ovl_get_nlink(lowerdentry, upperdentry, - realinode->i_nlink); + /* Recalculate nlink for non-dir due to indexing */ + if (!is_dir) + nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { inode = new_inode(dentry->d_sb); @@ -685,7 +702,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode); /* Check for non-merge dir that may have whiteouts */ - if (S_ISDIR(realinode->i_mode)) { + if (is_dir) { struct ovl_entry *oe = dentry->d_fsdata; if (((upperdentry && lowerdentry) || oe->numlower > 1) || diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 76440feb79f6..1a436fa92a04 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); + iput(oi->lower); kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index d6bb1c9f5e7a..06119f34a69d 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -257,7 +257,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry) - OVL_I(inode)->lower = d_inode(lowerdentry); + OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); } @@ -273,7 +273,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) */ smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; - if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + if (inode_unhashed(inode)) { inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } From d796e77f1dd541fe34481af2eee6454688d13982 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Nov 2017 09:39:46 +0200 Subject: [PATCH 02/51] ovl: fix failure to fsync lower dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a writable mount, it is not expected for overlayfs to return EINVAL/EROFS for fsync, even if dir/file is not changed. This commit fixes the case of fsync of directory, which is easier to address, because overlayfs already implements fsync file operation for directories. The problem reported by Raphael is that new PostgreSQL 10.0 with a database in overlayfs where lower layer in squashfs fails to start. The failure is due to fsync error, when PostgreSQL does fsync on all existing db directories on startup and a specific directory exists lower layer with no changes. Reported-by: Raphael Hertzog Cc: # v3.18 Signed-off-by: Amir Goldstein Tested-by: Raphaƫl Hertzog Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 8c98578d27a1..a7e45e6cd732 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -769,10 +769,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = READ_ONCE(od->upperfile); From 6d0a8a90a5bbfd6befcb512fad6618608e8c0e86 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 10 Nov 2017 13:18:07 +0200 Subject: [PATCH 03/51] ovl: take lower dir inode mutex outside upper sb_writers lock The functions ovl_lower_positive() and ovl_check_empty_dir() both take inode mutex on the real lower dir under ovl_want_write() which takes the upper_mnt sb_writers lock. While this is not a clear locking order or layering violation, it creates an undesired lock dependency between two unrelated layers for no good reason. This lock dependency materializes to a false(?) positive lockdep warning when calling rmdir() on a nested overlayfs, where both nested and underlying overlayfs both use the same fs type as upper layer. rmdir() on the nested overlayfs creates the lock chain: sb_writers of upper_mnt (e.g. tmpfs) in ovl_do_remove() ovl_i_mutex_dir_key[] of lower overlay dir in ovl_lower_positive() rmdir() on the underlying overlayfs creates the lock chain in reverse order: ovl_i_mutex_dir_key[] of lower overlay dir in vfs_rmdir() sb_writers of nested upper_mnt (e.g. tmpfs) in ovl_do_remove() To rid of the unneeded locking dependency, move both ovl_lower_positive() and ovl_check_empty_dir() to before ovl_want_write() in rmdir() and rename() implementation. This change spreads the pieces of ovl_check_empty_and_clear() directly inside the rmdir()/rename() implementations so the helper is no longer needed and removed. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 117 ++++++++++++++++++----------------------- fs/overlayfs/namei.c | 3 ++ fs/overlayfs/readdir.c | 3 ++ 3 files changed, 58 insertions(+), 65 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f9788bc116a8..a1a7606d4891 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -181,11 +181,6 @@ static bool ovl_type_origin(struct dentry *dentry) return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); } -static bool ovl_may_have_whiteouts(struct dentry *dentry) -{ - return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); -} - static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -301,37 +296,6 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) -{ - int err; - struct dentry *ret = NULL; - LIST_HEAD(list); - - err = ovl_check_empty_dir(dentry, &list); - if (err) { - ret = ERR_PTR(err); - goto out_free; - } - - /* - * When removing an empty opaque directory, then it makes no sense to - * replace it with an exact replica of itself. - * - * If upperdentry has whiteouts, clear them. - * - * Can race with copy-up, since we don't hold the upperdir mutex. - * Doesn't matter, since copy-up can't create a non-empty directory - * from an empty one. - */ - if (!list_empty(&list)) - ret = ovl_clear_empty(dentry, &list); - -out_free: - ovl_cache_free(&list); - - return ret; -} - static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, const struct posix_acl *acl) { @@ -623,7 +587,8 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper) return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper); } -static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, + struct list_head *list) { struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; @@ -638,8 +603,8 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) if (WARN_ON(!workdir)) return -EROFS; - if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry); + if (!list_empty(list)) { + opaquedir = ovl_clear_empty(dentry, list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -694,7 +659,8 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) goto out_d_drop; } -static int ovl_remove_upper(struct dentry *dentry, bool is_dir) +static int ovl_remove_upper(struct dentry *dentry, bool is_dir, + struct list_head *list) { struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; @@ -702,10 +668,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - /* Redirect/origin dir can be !ovl_lower_positive && not clean */ - if (is_dir && (ovl_dentry_get_redirect(dentry) || - ovl_may_have_whiteouts(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); + if (!list_empty(list)) { + opaquedir = ovl_clear_empty(dentry, list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -746,11 +710,26 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) return err; } +static bool ovl_pure_upper(struct dentry *dentry) +{ + return !ovl_dentry_lower(dentry) && + !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); +} + static int ovl_do_remove(struct dentry *dentry, bool is_dir) { int err; bool locked = false; const struct cred *old_cred; + bool lower_positive = ovl_lower_positive(dentry); + LIST_HEAD(list); + + /* No need to clean pure upper removed by vfs_rmdir() */ + if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) { + err = ovl_check_empty_dir(dentry, &list); + if (err) + goto out; + } err = ovl_want_write(dentry); if (err) @@ -765,10 +744,10 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) goto out_drop_write; old_cred = ovl_override_creds(dentry->d_sb); - if (!ovl_lower_positive(dentry)) - err = ovl_remove_upper(dentry, is_dir); + if (!lower_positive) + err = ovl_remove_upper(dentry, is_dir, &list); else - err = ovl_remove_and_whiteout(dentry, is_dir); + err = ovl_remove_and_whiteout(dentry, &list); revert_creds(old_cred); if (!err) { if (is_dir) @@ -780,6 +759,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) out_drop_write: ovl_drop_write(dentry); out: + ovl_cache_free(&list); return err; } @@ -915,6 +895,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, bool samedir = olddir == newdir; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; + LIST_HEAD(list); err = -EINVAL; if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) @@ -929,6 +910,27 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (!overwrite && !ovl_can_move(new)) goto out; + if (overwrite && new_is_dir && !ovl_pure_upper(new)) { + err = ovl_check_empty_dir(new, &list); + if (err) + goto out; + } + + if (overwrite) { + if (ovl_lower_positive(old)) { + if (!ovl_dentry_is_whiteout(new)) { + /* Whiteout source */ + flags |= RENAME_WHITEOUT; + } else { + /* Switch whiteouts */ + flags |= RENAME_EXCHANGE; + } + } else if (is_dir && ovl_dentry_is_whiteout(new)) { + flags |= RENAME_EXCHANGE; + cleanup_whiteout = true; + } + } + err = ovl_want_write(old); if (err) goto out; @@ -952,9 +954,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_cred = ovl_override_creds(old->d_sb); - if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) || - ovl_may_have_whiteouts(new))) { - opaquedir = ovl_check_empty_and_clear(new); + if (!list_empty(&list)) { + opaquedir = ovl_clear_empty(new, &list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; @@ -962,21 +963,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, } } - if (overwrite) { - if (ovl_lower_positive(old)) { - if (!ovl_dentry_is_whiteout(new)) { - /* Whiteout source */ - flags |= RENAME_WHITEOUT; - } else { - /* Switch whiteouts */ - flags |= RENAME_EXCHANGE; - } - } else if (is_dir && ovl_dentry_is_whiteout(new)) { - flags |= RENAME_EXCHANGE; - cleanup_whiteout = true; - } - } - old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); @@ -1094,6 +1080,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, ovl_drop_write(old); out: dput(opaquedir); + ovl_cache_free(&list); return err; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index beb945e1963c..926248e1de04 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -774,6 +774,7 @@ bool ovl_lower_positive(struct dentry *dentry) struct ovl_entry *oe = dentry->d_fsdata; struct ovl_entry *poe = dentry->d_parent->d_fsdata; const struct qstr *name = &dentry->d_name; + const struct cred *old_cred; unsigned int i; bool positive = false; bool done = false; @@ -789,6 +790,7 @@ bool ovl_lower_positive(struct dentry *dentry) if (!ovl_dentry_upper(dentry)) return true; + old_cred = ovl_override_creds(dentry->d_sb); /* Positive upper -> have to look up lower to see whether it exists */ for (i = 0; !done && !positive && i < poe->numlower; i++) { struct dentry *this; @@ -818,6 +820,7 @@ bool ovl_lower_positive(struct dentry *dentry) dput(this); } } + revert_creds(old_cred); return positive; } diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index a7e45e6cd732..7dfe381c2cd8 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -862,8 +862,11 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) int err; struct ovl_cache_entry *p, *n; struct rb_root root = RB_ROOT; + const struct cred *old_cred; + old_cred = ovl_override_creds(dentry->d_sb); err = ovl_dir_read_merged(dentry, list, &root); + revert_creds(old_cred); if (err) return err; From f81678173ce25a1c7e1570a328dfba50b5d872eb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 18 Dec 2017 14:25:56 +0200 Subject: [PATCH 04/51] ovl: fix another overlay: warning prefix Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 926248e1de04..69a43ede0a2a 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -693,7 +693,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, */ err = -EPERM; if (d.redirect && !ofs->config.redirect_follow) { - pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry); + pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n", + dentry); goto out_put; } From 2ba9d57e65044859f7ff133bcb0a902769bf3bc6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 3 Jan 2018 18:54:41 +0200 Subject: [PATCH 05/51] ovl: take mnt_want_write() for work/index dir setup There are several write operations on upper fs not covered by mnt_want_write(): - test set/remove OPAQUE xattr - test create O_TMPFILE - set ORIGIN xattr in ovl_verify_origin() - cleanup of index entries in ovl_indexdir_cleanup() Some of these go way back, but this patch only applies over the v4.14 re-factoring of ovl_fill_super(). Cc: #v4.14 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 1a436fa92a04..3387e6d639a5 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -521,10 +521,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, bool retried = false; bool locked = false; - err = mnt_want_write(mnt); - if (err) - goto out_err; - inode_lock_nested(dir, I_MUTEX_PARENT); locked = true; @@ -589,7 +585,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, goto out_err; } out_unlock: - mnt_drop_write(mnt); if (locked) inode_unlock(dir); @@ -930,12 +925,17 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath) static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) { + struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; int err; + err = mnt_want_write(mnt); + if (err) + return err; + ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); if (!ofs->workdir) - return 0; + goto out; /* * Upper should support d_type, else whiteouts are visible. Given @@ -945,7 +945,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) */ err = ovl_check_d_type_supported(workpath); if (err < 0) - return err; + goto out; /* * We allowed this configuration and don't want to break users over @@ -969,6 +969,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) if (err) { ofs->noxattr = true; pr_warn("overlayfs: upper fs does not support xattr.\n"); + err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); } @@ -980,7 +981,9 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); } - return 0; +out: + mnt_drop_write(mnt); + return err; } static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) @@ -1027,8 +1030,13 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, struct path *upperpath) { + struct vfsmount *mnt = ofs->upper_mnt; int err; + err = mnt_want_write(mnt); + if (err) + return err; + /* Verify lower root is upper root origin */ err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, false, true); @@ -1056,6 +1064,7 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: + mnt_drop_write(mnt); return err; } From a5a927a7c82e28ea76599dee4019c41e372c911f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 3 Jan 2018 18:54:42 +0200 Subject: [PATCH 06/51] ovl: take mnt_want_write() for removing impure xattr The optimization in ovl_cache_get_impure() that tries to remove an unneeded "impure" xattr needs to take mnt_want_write() on upper fs. Fixes: 4edb83bb1041 ("ovl: constant d_ino for non-merge dirs") Cc: #v4.14 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7dfe381c2cd8..1a8c39887992 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -593,8 +593,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) return ERR_PTR(res); } if (list_empty(&cache->entries)) { - /* Good oportunity to get rid of an unnecessary "impure" flag */ - ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE); + /* + * A good opportunity to get rid of an unneeded "impure" flag. + * Removing the "impure" xattr is best effort. + */ + if (!ovl_want_write(dentry)) { + ovl_do_removexattr(ovl_dentry_upper(dentry), + OVL_XATTR_IMPURE); + ovl_drop_write(dentry); + } ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); kfree(cache); return NULL; From 9678e630305724487f1fc101d6b83c383ff9cc90 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 3 Jan 2018 19:34:45 +0200 Subject: [PATCH 07/51] ovl: fix inconsistent d_ino for legacy merge dir For a merge dir that was copied up before v4.12 or that was hand crafted offline (e.g. mkdir {upper/lower}/dir), upper dir does not contain the 'trusted.overlay.origin' xattr. In that case, stat(2) on the merge dir returns the lower dir st_ino, but getdents(2) returns the upper dir d_ino. After this change, on merge dir lookup, missing origin xattr on upper dir will be fixed and 'impure' xattr will be fixed on parent of the legacy merge dir. Suggested-by: zhangyi (F) Reviewed-by: zhangyi (F) Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 4 ++-- fs/overlayfs/namei.c | 33 +++++++++++++++++++++++++++++++++ fs/overlayfs/overlayfs.h | 2 ++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index eb3b8d39fb61..206ececd5ae7 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -288,8 +288,8 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) return fh; } -static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, - struct dentry *upper) +int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 69a43ede0a2a..69f4f19659fc 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -584,6 +584,27 @@ static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) return i; } +/* Fix missing 'origin' xattr */ +static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) +{ + int err; + + if (ovl_check_origin_xattr(upper)) + return 0; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = ovl_set_origin(dentry, lower, upper); + if (!err) + err = ovl_set_impure(dentry->d_parent, upper->d_parent); + + ovl_drop_write(dentry); + return err; +} + struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -674,6 +695,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!this) continue; + /* + * If no origin fh is stored in upper of a merge dir, store fh + * of lower dir and set upper parent "impure". + */ + if (upperdentry && !ctr && !ofs->noxattr) { + err = ovl_fix_origin(dentry, this, upperdentry); + if (err) { + dput(this); + goto out_put; + } + } + stack[ctr].dentry = this; stack[ctr].layer = lower.layer; ctr++; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index b489099ccd49..d1cfa69c98b5 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -322,3 +322,5 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); +int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper); From a683737ba924cd2985f6e7350520f449915ff8f9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 19 Sep 2017 12:14:18 +0300 Subject: [PATCH 08/51] ovl: disable index when no xattr support Overlayfs falls back to index=off if lower/upper fs does not support file handles. Do the same if upper fs does not support xattr. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 3387e6d639a5..f3281f0b2388 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -968,7 +968,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); if (err) { ofs->noxattr = true; - pr_warn("overlayfs: upper fs does not support xattr.\n"); + ofs->config.index = false; + pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); From 972d0093c2f7b1bd57e47a1780a552dde528fd16 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 19 Sep 2017 12:14:18 +0300 Subject: [PATCH 09/51] ovl: force r/o mount when index dir creation fails When work dir creation fails, a warning is emitted and overlay is mounted r/o. Trying to remount r/w will fail with no work dir. When index dir creation fails, the same warning is emitted and overlay is mounted r/o, but trying to remount r/w will succeed. This may cause unintentional corruption of filesystem consistency. Adjust the behavior of index dir creation failure to that of work dir creation failure and do not allow to remount r/w. User needs to state an explicitly intention to work without an index by mounting with option 'index=off' to allow r/w mount with no index dir. When mounting with option 'index=on' and no 'upperdir', index is implicitly disabled, so do not warn about no file handle support. The issue was introduced with inodes index feature in v4.13, but this patch will not apply cleanly before ovl_fill_super() re-factoring in v4.15. Fixes: 02bcd1577400 ("ovl: introduce the inodes index dir feature") Cc: #v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f3281f0b2388..9aa5d32af427 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -699,7 +699,8 @@ static int ovl_lower_dir(const char *name, struct path *path, * The inodes index feature needs to encode and decode file * handles, so it requires that all layers support them. */ - if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { + if (ofs->config.index && ofs->config.upperdir && + !ovl_can_decode_fh(path->dentry->d_sb)) { ofs->config.index = false; pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); } @@ -1268,11 +1269,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_free_oe; - if (!ofs->indexdir) + /* Force r/o mount with no index dir */ + if (!ofs->indexdir) { + dput(ofs->workdir); + ofs->workdir = NULL; sb->s_flags |= SB_RDONLY; + } + } - /* Show index=off/on in /proc/mounts for any of the reasons above */ + /* Show index=off in /proc/mounts for forced r/o mount */ if (!ofs->indexdir) ofs->config.index = false; From d583ed7d138825fd9469d5419e23230ad39173e8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Nov 2017 19:23:36 +0200 Subject: [PATCH 10/51] ovl: store layer index in ovl_layer Store the fs root layer index inside ovl_layer struct, so we can get the root fs layer index from merge dir lower layer instead of find it with ovl_find_layer() helper. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 17 +---------------- fs/overlayfs/ovl_entry.h | 2 ++ fs/overlayfs/super.c | 1 + 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 69f4f19659fc..a38db76cbccd 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -572,18 +572,6 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) return (idx < oe->numlower) ? idx + 1 : -1; } -static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) -{ - int i; - - for (i = 0; i < ofs->numlower; i++) { - if (ofs->lower_layers[i].mnt == path->layer->mnt) - break; - } - - return i; -} - /* Fix missing 'origin' xattr */ static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper) @@ -733,11 +721,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (d.redirect && d.redirect[0] == '/' && poe != roe) { poe = roe; - /* Find the current layer on the root dentry */ - i = ovl_find_layer(ofs, &lower); - if (WARN_ON(i == ofs->numlower)) - break; + i = lower.layer->idx - 1; } } diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 9d0bc03bf6e4..608e48755070 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -22,6 +22,8 @@ struct ovl_config { struct ovl_layer { struct vfsmount *mnt; dev_t pseudo_dev; + /* Index of this layer in fs root (upper == 0) */ + int idx; }; struct ovl_path { diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9aa5d32af427..b34a002ab4b5 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1106,6 +1106,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, ofs->lower_layers[ofs->numlower].mnt = mnt; ofs->lower_layers[ofs->numlower].pseudo_dev = dev; + ofs->lower_layers[ofs->numlower].idx = i + 1; ofs->numlower++; /* Check if all lower layers are on same sb */ From 2e1a532883cf77f01031bef4b83d864a46c1bed0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 24 Oct 2017 15:12:15 +0300 Subject: [PATCH 11/51] ovl: factor out ovl_check_origin_fh() Re-factor ovl_check_origin() and ovl_get_origin(), so origin fh xattr is read from upper inode only once during lookup with multiple lower layers and only once when verifying index entry origin. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 142 ++++++++++++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 50 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index a38db76cbccd..a6b9bd2afca1 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -87,9 +87,36 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry) return 1; } +/* + * Check validity of an overlay file handle buffer. + * + * Return 0 for a valid file handle. + * Return -ENODATA for "origin unknown". + * Return <0 for an invalid file handle. + */ +static int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) +{ + if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len) + return -EINVAL; + + if (fh->magic != OVL_FH_MAGIC) + return -EINVAL; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) + return -ENODATA; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + return -ENODATA; + + return 0; +} + static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) { - int res; + int res, err; struct ovl_fh *fh = NULL; res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); @@ -102,7 +129,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (res == 0) return NULL; - fh = kzalloc(res, GFP_KERNEL); + fh = kzalloc(res, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); @@ -110,20 +137,12 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (res < 0) goto fail; - if (res < sizeof(struct ovl_fh) || res < fh->len) + err = ovl_check_fh_len(fh, res); + if (err < 0) { + if (err == -ENODATA) + goto out; goto invalid; - - if (fh->magic != OVL_FH_MAGIC) - goto invalid; - - /* Treat larger version and unknown flags as "origin unknown" */ - if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) - goto out; - - /* Treat endianness mismatch as "origin unknown" */ - if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && - (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) - goto out; + } return fh; @@ -139,22 +158,17 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) goto out; } -static struct dentry *ovl_get_origin(struct dentry *dentry, - struct vfsmount *mnt) +static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) { - struct dentry *origin = NULL; - struct ovl_fh *fh = ovl_get_origin_fh(dentry); + struct dentry *origin; int bytes; - if (IS_ERR_OR_NULL(fh)) - return (struct dentry *)fh; - /* * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. */ if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) - goto out; + return NULL; bytes = (fh->len - offsetof(struct ovl_fh, fid)); origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, @@ -164,22 +178,15 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, /* Treat stale file handle as "origin unknown" */ if (origin == ERR_PTR(-ESTALE)) origin = NULL; - goto out; + return origin; } - if (ovl_dentry_weird(origin) || - ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) - goto invalid; + if (ovl_dentry_weird(origin)) { + dput(origin); + return NULL; + } -out: - kfree(fh); return origin; - -invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); - dput(origin); - origin = NULL; - goto out; } static bool ovl_is_opaquedir(struct dentry *dentry) @@ -284,9 +291,9 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -static int ovl_check_origin(struct dentry *upperdentry, - struct ovl_path *lower, unsigned int numlower, - struct ovl_path **stackp, unsigned int *ctrp) +static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry, + struct ovl_path *lower, unsigned int numlower, + struct ovl_path **stackp) { struct vfsmount *mnt; struct dentry *origin = NULL; @@ -294,18 +301,20 @@ static int ovl_check_origin(struct dentry *upperdentry, for (i = 0; i < numlower; i++) { mnt = lower[i].layer->mnt; - origin = ovl_get_origin(upperdentry, mnt); - if (IS_ERR(origin)) - return PTR_ERR(origin); - + origin = ovl_decode_fh(fh, mnt); if (origin) break; } if (!origin) - return 0; + return -ESTALE; + else if (IS_ERR(origin)) + return PTR_ERR(origin); + + if (!ovl_is_whiteout(upperdentry) && + ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT)) + goto invalid; - BUG_ON(*ctrp); if (!*stackp) *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); if (!*stackp) { @@ -313,9 +322,41 @@ static int ovl_check_origin(struct dentry *upperdentry, return -ENOMEM; } **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; - *ctrp = 1; return 0; + +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", + upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); + dput(origin); + return -EIO; +} + +static int ovl_check_origin(struct dentry *upperdentry, + struct ovl_path *lower, unsigned int numlower, + struct ovl_path **stackp, unsigned int *ctrp) +{ + struct ovl_fh *fh = ovl_get_origin_fh(upperdentry); + int err; + + if (IS_ERR_OR_NULL(fh)) + return PTR_ERR(fh); + + err = ovl_check_origin_fh(fh, upperdentry, lower, numlower, stackp); + kfree(fh); + + if (err) { + if (err == -ESTALE) + return 0; + return err; + } + + if (WARN_ON(*ctrp)) + return -EIO; + + *ctrp = 1; + return 0; } /* @@ -389,7 +430,6 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower, size_t len; struct ovl_path origin = { }; struct ovl_path *stack = &origin; - unsigned int ctr = 0; int err; if (!d_inode(index)) @@ -420,16 +460,18 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower, goto fail; err = -EINVAL; - if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) + if (hex2bin((u8 *)fh, index->d_name.name, len)) + goto fail; + + err = ovl_check_fh_len(fh, len); + if (err) goto fail; err = ovl_verify_origin_fh(index, fh); if (err) goto fail; - err = ovl_check_origin(index, lower, numlower, &stack, &ctr); - if (!err && !ctr) - err = -ESTALE; + err = ovl_check_origin_fh(fh, index, lower, numlower, &stack); if (err) goto fail; From 1eff1a1deec727bacead79ec64554c1df190f43c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2017 22:40:46 +0200 Subject: [PATCH 12/51] ovl: simplify arguments to ovl_check_origin_fh() Pass the fs instance with lower_layers array instead of the dentry lowerstack array to ovl_check_origin_fh(), because the dentry members of lowerstack play no role in this helper. This change simplifies the argument list of ovl_check_origin(), ovl_cleanup_index() and ovl_verify_index(). Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 28 +++++++++++++--------------- fs/overlayfs/overlayfs.h | 9 ++++----- fs/overlayfs/readdir.c | 12 ++++++------ fs/overlayfs/super.c | 5 +---- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index a6b9bd2afca1..27f25a61f6e4 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -291,17 +291,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry, - struct ovl_path *lower, unsigned int numlower, +static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp) { - struct vfsmount *mnt; struct dentry *origin = NULL; int i; - for (i = 0; i < numlower; i++) { - mnt = lower[i].layer->mnt; - origin = ovl_decode_fh(fh, mnt); + for (i = 0; i < ofs->numlower; i++) { + origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt); if (origin) break; } @@ -321,7 +319,10 @@ static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry, dput(origin); return -ENOMEM; } - **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; + **stackp = (struct ovl_path){ + .dentry = origin, + .layer = &ofs->lower_layers[i] + }; return 0; @@ -333,8 +334,7 @@ static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry, return -EIO; } -static int ovl_check_origin(struct dentry *upperdentry, - struct ovl_path *lower, unsigned int numlower, +static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, struct ovl_path **stackp, unsigned int *ctrp) { struct ovl_fh *fh = ovl_get_origin_fh(upperdentry); @@ -343,7 +343,7 @@ static int ovl_check_origin(struct dentry *upperdentry, if (IS_ERR_OR_NULL(fh)) return PTR_ERR(fh); - err = ovl_check_origin_fh(fh, upperdentry, lower, numlower, stackp); + err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp); kfree(fh); if (err) { @@ -423,8 +423,7 @@ int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. */ -int ovl_verify_index(struct dentry *index, struct ovl_path *lower, - unsigned int numlower) +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) { struct ovl_fh *fh = NULL; size_t len; @@ -471,7 +470,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower, if (err) goto fail; - err = ovl_check_origin_fh(fh, index, lower, numlower, &stack); + err = ovl_check_origin_fh(ofs, fh, index, &stack); if (err) goto fail; @@ -689,8 +688,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(upperdentry, roe->lowerstack, - roe->numlower, &stack, &ctr); + err = ovl_check_origin(ofs, upperdentry, &stack, &ctr); if (err) goto out_put_upper; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d1cfa69c98b5..d55afb6646b0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -251,11 +251,11 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, bool is_upper, bool set); -int ovl_verify_index(struct dentry *index, struct ovl_path *lower, - unsigned int numlower); +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); /* readdir.c */ @@ -267,8 +267,7 @@ void ovl_dir_cache_free(struct inode *inode); int ovl_check_d_type_supported(struct path *realpath); void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); -int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct ovl_path *lower, unsigned int numlower); +int ovl_indexdir_cleanup(struct ovl_fs *ofs); /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 1a8c39887992..4c660c7085b7 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1030,13 +1030,13 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, } } -int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct ovl_path *lower, unsigned int numlower) +int ovl_indexdir_cleanup(struct ovl_fs *ofs) { int err; + struct dentry *indexdir = ofs->indexdir; struct dentry *index = NULL; - struct inode *dir = dentry->d_inode; - struct path path = { .mnt = mnt, .dentry = dentry }; + struct inode *dir = indexdir->d_inode; + struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir }; LIST_HEAD(list); struct rb_root root = RB_ROOT; struct ovl_cache_entry *p; @@ -1060,13 +1060,13 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, if (p->len == 2 && p->name[1] == '.') continue; } - index = lookup_one_len(p->name, dentry, p->len); + index = lookup_one_len(p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; break; } - err = ovl_verify_index(index, lower, numlower); + err = ovl_verify_index(ofs, index); /* Cleanup stale and orphan index entries */ if (err && (err == -ESTALE || err == -ENOENT)) err = ovl_cleanup(dir, index); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b34a002ab4b5..4345c3f83fde 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1057,10 +1057,7 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, /* Cleanup bad/stale/orphan index entries */ if (!err) - err = ovl_indexdir_cleanup(ofs->indexdir, - ofs->upper_mnt, - oe->lowerstack, - oe->numlower); + err = ovl_indexdir_cleanup(ofs); } if (err || !ofs->indexdir) pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); From 051224438af21047b34160b1e0ad1c5af45fdace Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 Jan 2018 08:25:32 +0200 Subject: [PATCH 13/51] ovl: generalize ovl_verify_origin() and helpers Remove the "origin" language from the functions that handle set, get and verify of "origin" xattr and pass the xattr name as an argument. The same helpers are going to be used for NFS export to get, get and verify the "upper" xattr for directory index entries. ovl_verify_origin() is now a helper used only to verify non upper file handle stored in "origin" xattr of upper inode. The upper root dir file handle is still stored in "origin" xattr on the index dir for backward compatibility. This is going to be changed by the patch that adds directory index entries support. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 6 +++--- fs/overlayfs/namei.c | 42 +++++++++++++++++++++------------------- fs/overlayfs/overlayfs.h | 12 +++++++++--- fs/overlayfs/super.c | 8 ++++---- 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 206ececd5ae7..503c92404095 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -232,13 +232,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) +struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper) { struct ovl_fh *fh; int fh_type, fh_len, dwords; void *buf; int buflen = MAX_HANDLE_SZ; - uuid_t *uuid = &lower->d_sb->s_uuid; + uuid_t *uuid = &real->d_sb->s_uuid; buf = kmalloc(buflen, GFP_KERNEL); if (!buf) @@ -250,7 +250,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) * the price or reconnecting the dentry. */ dwords = buflen >> 2; - fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); + fh_type = exportfs_encode_fh(real, buf, &dwords, 0); buflen = (dwords << 2); fh = ERR_PTR(-EIO); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 27f25a61f6e4..11e164cb2593 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -114,12 +114,12 @@ static int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) return 0; } -static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) +static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) { int res, err; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = vfs_getxattr(dentry, name, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -133,7 +133,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + res = vfs_getxattr(dentry, name, fh, res); if (res < 0) goto fail; @@ -337,7 +337,7 @@ static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, struct ovl_path **stackp, unsigned int *ctrp) { - struct ovl_fh *fh = ovl_get_origin_fh(upperdentry); + struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN); int err; if (IS_ERR_OR_NULL(fh)) @@ -360,12 +360,13 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, } /* - * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. + * Verify that @fh matches the file handle stored in xattr @name. * Return 0 on match, -ESTALE on mismatch, < 0 on error. */ -static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) +static int ovl_verify_fh(struct dentry *dentry, const char *name, + const struct ovl_fh *fh) { - struct ovl_fh *ofh = ovl_get_origin_fh(dentry); + struct ovl_fh *ofh = ovl_get_fh(dentry, name); int err = 0; if (!ofh) @@ -382,28 +383,28 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) } /* - * Verify that an inode matches the origin file handle stored in upper inode. + * Verify that @real dentry matches the file handle stored in xattr @name. * - * If @set is true and there is no stored file handle, encode and store origin - * file handle in OVL_XATTR_ORIGIN. + * If @set is true and there is no stored file handle, encode @real and store + * file handle in xattr @name. * - * Return 0 on match, -ESTALE on mismatch, < 0 on error. + * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, - bool is_upper, bool set) +int ovl_verify_set_fh(struct dentry *dentry, const char *name, + struct dentry *real, bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; int err; - fh = ovl_encode_fh(origin, is_upper); + fh = ovl_encode_fh(real, is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; - err = ovl_verify_origin_fh(dentry, fh); + err = ovl_verify_fh(dentry, name, fh); if (set && err == -ENODATA) - err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); + err = ovl_do_setxattr(dentry, name, fh, fh->len, 0); if (err) goto fail; @@ -412,9 +413,10 @@ int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, return err; fail: - inode = d_inode(origin); - pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", - origin, inode ? inode->i_ino : 0, err); + inode = d_inode(real); + pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n", + is_upper ? "upper" : "origin", real, + inode ? inode->i_ino : 0, err); goto out; } @@ -466,7 +468,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) if (err) goto fail; - err = ovl_verify_origin_fh(index, fh); + err = ovl_verify_fh(index, OVL_XATTR_ORIGIN, fh); if (err) goto fail; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d55afb6646b0..1d62b1e6111a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -249,8 +249,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ -int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, - bool is_upper, bool set); +int ovl_verify_set_fh(struct dentry *dentry, const char *name, + struct dentry *real, bool is_upper, bool set); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); @@ -258,6 +258,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); +static inline int ovl_verify_origin(struct dentry *upper, + struct dentry *origin, bool set) +{ + return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set); +} + /* readdir.c */ extern const struct file_operations ovl_dir_operations; int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); @@ -320,6 +326,6 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); +struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper); int ovl_set_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4345c3f83fde..4ebbb368fce8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1041,7 +1041,7 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, /* Verify lower root is upper root origin */ err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, - false, true); + true); if (err) { pr_err("overlayfs: failed to verify upper root origin\n"); goto out; @@ -1049,9 +1049,9 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (ofs->indexdir) { - /* Verify upper root is index dir origin */ - err = ovl_verify_origin(ofs->indexdir, upperpath->dentry, - true, true); + /* Verify upper root is exclusively associated with index dir */ + err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, + upperpath->dentry, true, true); if (err) pr_err("overlayfs: failed to verify index dir origin\n"); From 60b866420ba7ae0b6a8d338f49be21c601d19064 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 10 Jan 2018 23:15:21 +0200 Subject: [PATCH 14/51] ovl: update documentation of inodes index feature Document that inode index feature solves breaking hard links on copy up. Simplify Kconfig backward compatibility disclaimer. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 6 +++--- fs/overlayfs/Kconfig | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index e6a5f4912b6d..213547cb6d36 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -281,9 +281,9 @@ filesystem, so both st_dev and st_ino of the file may change. Any open files referring to this inode will access the old data. -If a file with multiple hard links is copied up, then this will -"break" the link. Changes will not be propagated to other names -referring to the same inode. +Unless "inode index" feature is enabled, if a file with multiple hard +links is copied up, then this will "break" the link. Changes will not be +propagated to other names referring to the same inode. Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged directory will fail with EXDEV. diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 5ac415466861..9eac01c3e21e 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -47,9 +47,6 @@ config OVERLAY_FS_INDEX The inodes index feature prevents breaking of lower hardlinks on copy up. - Note, that the inodes index feature is read-only backward compatible. - That is, mounting an overlay which has an index dir on a kernel that - doesn't support this feature read-only, will not have any negative - outcomes. However, mounting the same overlay with an old kernel - read-write and then mounting it again with a new kernel, will have - unexpected results. + Note, that the inodes index feature is not backward compatible. + That is, mounting an overlay which has an inodes index on a kernel + that doesn't support this feature will have unexpected results. From f168f1098dd9038daaf9f7be5f81cdea4985886a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 19 Jan 2018 11:26:53 +0200 Subject: [PATCH 15/51] ovl: add support for "nfs_export" configuration Introduce the "nfs_export" config, module and mount options. The NFS export feature depends on the "index" feature and enables two implicit overlayfs features: "index_all" and "verify_lower". The "index_all" feature creates an index on copy up of every file and directory. The "verify_lower" feature uses the full index to detect overlay filesystems inconsistencies on lookup, like redirect from multiple upper dirs to the same lower dir. NFS export can be enabled for non-upper mount with no index. However, because lower layer redirects cannot be verified with the index, enabling NFS export support on an overlay with no upper layer requires turning off redirect follow (e.g. "redirect_dir=nofollow"). The full index may incur some overhead on mount time, especially when verifying that lower directory file handles are not stale. NFS export support, full index and consistency verification will be implemented by following patches. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 35 ++++++++++++++++++ fs/overlayfs/Kconfig | 22 +++++++++++ fs/overlayfs/overlayfs.h | 2 + fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/super.c | 49 ++++++++++++++++++++++--- fs/overlayfs/util.c | 16 ++++++++ 6 files changed, 120 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 213547cb6d36..7a05329844d2 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -190,6 +190,24 @@ Mount options: Redirects are not created and not followed (equivalent to "redirect_dir=off" if "redirect_always_follow" feature is not enabled). +When the NFS export feature is enabled, every copied up directory is +indexed by the file handle of the lower inode and a file handle of the +upper directory is stored in a "trusted.overlay.upper" extended attribute +on the index entry. On lookup of a merged directory, if the upper +directory does not match the file handle stores in the index, that is an +indication that multiple upper directories may be redirected to the same +lower directory. In that case, lookup returns an error and warns about +a possible inconsistency. + +Because lower layer redirects cannot be verified with the index, enabling +NFS export support on an overlay filesystem with no upper layer requires +turning off redirect follow (e.g. "redirect_dir=nofollow"). + +When the NFS export feature is enabled, all directory index entries are +verified on mount time to check that upper file handles are not stale. +This verification may cause significant overhead in some cases. + + Non-directories --------------- @@ -299,6 +317,23 @@ filesystem are not allowed. If the underlying filesystem is changed, the behavior of the overlay is undefined, though it will not result in a crash or deadlock. +When the overlay NFS export feature is enabled, overlay filesystems +behavior on offline changes of the underlying lower layer is different +than the behavior when NFS export is disabled. + +On every copy_up, an NFS file handle of the lower inode, along with the +UUID of the lower filesystem, are encoded and stored in an extended +attribute "trusted.overlay.origin" on the upper inode. + +When the NFS export feature is enabled, a lookup of a merged directory, +that found a lower directory at the lookup path or at the path pointed +to by the "trusted.overlay.redirect" extended attribute, will verify +that the found lower directory file handle and lower filesystem UUID +match the origin file handle that was stored at copy_up time. If a +found lower directory does not match the stored origin, that directory +will not be merged with the upper directory. + + Testsuite --------- diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 9eac01c3e21e..406e72de88f6 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -50,3 +50,25 @@ config OVERLAY_FS_INDEX Note, that the inodes index feature is not backward compatible. That is, mounting an overlay which has an inodes index on a kernel that doesn't support this feature will have unexpected results. + +config OVERLAY_FS_NFS_EXPORT + bool "Overlayfs: turn on NFS export feature by default" + depends on OVERLAY_FS + depends on OVERLAY_FS_INDEX + help + If this config option is enabled then overlay filesystems will use + the inodes index dir to decode overlay NFS file handles by default. + In this case, it is still possible to turn off NFS export support + globally with the "nfs_export=off" module option or on a filesystem + instance basis with the "nfs_export=off" mount option. + + The NFS export feature creates an index on copy up of every file and + directory. This full index is used to detect overlay filesystems + inconsistencies on lookup, like redirect from multiple upper dirs to + the same lower dir. The full index may incur some overhead on mount + time, especially when verifying that directory file handles are not + stale. + + Note, that the NFS export feature is not backward compatible. + That is, mounting an overlay which has a full index on a kernel + that doesn't support this feature will have unexpected results. diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 1d62b1e6111a..db75955f9677 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -194,6 +194,8 @@ const struct cred *ovl_override_creds(struct super_block *sb); struct super_block *ovl_same_sb(struct super_block *sb); bool ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); +bool ovl_index_all(struct super_block *sb); +bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 608e48755070..6dd60fcf8cb7 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -17,6 +17,7 @@ struct ovl_config { bool redirect_follow; const char *redirect_mode; bool index; + bool nfs_export; }; struct ovl_layer { diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4ebbb368fce8..1d538be87fa0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -45,6 +45,11 @@ module_param_named(index, ovl_index_def, bool, 0644); MODULE_PARM_DESC(ovl_index_def, "Default to on or off for the inodes index feature"); +static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); +module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); +MODULE_PARM_DESC(ovl_nfs_export_def, + "Default to on or off for the NFS export feature"); + static void ovl_entry_stack_free(struct ovl_entry *oe) { unsigned int i; @@ -342,6 +347,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); if (ofs->config.index != ovl_index_def) seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); + if (ofs->config.nfs_export != ovl_nfs_export_def) + seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? + "on" : "off"); return 0; } @@ -374,6 +382,8 @@ enum { OPT_REDIRECT_DIR, OPT_INDEX_ON, OPT_INDEX_OFF, + OPT_NFS_EXPORT_ON, + OPT_NFS_EXPORT_OFF, OPT_ERR, }; @@ -385,6 +395,8 @@ static const match_table_t ovl_tokens = { {OPT_REDIRECT_DIR, "redirect_dir=%s"}, {OPT_INDEX_ON, "index=on"}, {OPT_INDEX_OFF, "index=off"}, + {OPT_NFS_EXPORT_ON, "nfs_export=on"}, + {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, {OPT_ERR, NULL} }; @@ -491,6 +503,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->index = false; break; + case OPT_NFS_EXPORT_ON: + config->nfs_export = true; + break; + + case OPT_NFS_EXPORT_OFF: + config->nfs_export = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -696,13 +716,16 @@ static int ovl_lower_dir(const char *name, struct path *path, *remote = true; /* - * The inodes index feature needs to encode and decode file - * handles, so it requires that all layers support them. + * The inodes index feature and NFS export need to encode and decode + * file handles, so they require that all layers support them. */ - if (ofs->config.index && ofs->config.upperdir && + if ((ofs->config.nfs_export || + (ofs->config.index && ofs->config.upperdir)) && !ovl_can_decode_fh(path->dentry->d_sb)) { ofs->config.index = false; - pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); + ofs->config.nfs_export = false; + pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", + name); } return 0; @@ -983,6 +1006,12 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); } + /* NFS export of r/w mount depends on index */ + if (ofs->config.nfs_export && !ofs->config.index) { + pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + out: mnt_drop_write(mnt); return err; @@ -1141,6 +1170,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, } else if (!ofs->config.upperdir && stacklen == 1) { pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); goto out_err; + } else if (!ofs->config.upperdir && ofs->config.nfs_export && + ofs->config.redirect_follow) { + pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; } err = -ENOMEM; @@ -1217,6 +1250,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_err; ofs->config.index = ovl_index_def; + ofs->config.nfs_export = ovl_nfs_export_def; err = ovl_parse_opt((char *) data, &ofs->config); if (err) goto out_err; @@ -1277,8 +1311,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } /* Show index=off in /proc/mounts for forced r/o mount */ - if (!ofs->indexdir) + if (!ofs->indexdir) { ofs->config.index = false; + if (ofs->upper_mnt && ofs->config.nfs_export) { + pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + } /* Never override disk quota limits or use reserved space */ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 06119f34a69d..ae81d878248e 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -63,6 +63,22 @@ struct dentry *ovl_indexdir(struct super_block *sb) return ofs->indexdir; } +/* Index all files on copy up. For now only enabled for NFS export */ +bool ovl_index_all(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.nfs_export && ofs->config.index; +} + +/* Verify lower origin on lookup. For now only enabled for NFS export */ +bool ovl_verify_lower(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.nfs_export && ofs->config.index; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); From 37b12916c0f802d956c767db984801d3100c6524 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 10 Jan 2018 22:29:38 +0200 Subject: [PATCH 16/51] ovl: verify stored origin fh matches lower dir When the NFS export feature is enabled, overlayfs implicitly enables the feature "verify_lower". When the "verify_lower" feature is enabled, a directory inode found in lower layer by name or by redirect_dir is verified against the file handle of the copy up origin that is stored in the upper layer. This introduces a change of behavior for the case of lower layer modification while overlay is offline. A lower directory created or moved offline under an exisitng upper directory, will not be merged with that upper directory. The NFS export feature should not be used after copying layers, because the new lower directory inodes would fail verification and won't be merged with upper directories. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 11e164cb2593..69ca8eb07519 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -737,6 +737,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } } + /* + * When "verify_lower" feature is enabled, do not merge with a + * lower dir that does not match a stored origin xattr. + */ + if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) { + err = ovl_verify_origin(upperdentry, this, false); + if (err) { + dput(this); + break; + } + } + stack[ctr].dentry = this; stack[ctr].layer = lower.layer; ctr++; From 86eaa13046d5e814484c89f635a95b0342b765ad Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 21 Nov 2017 13:55:51 +0200 Subject: [PATCH 17/51] ovl: unbless lower st_ino of unverified origin On a malformed overlay, several redirected dirs can point to the same dir on a lower layer. This presents a similar challenge as broken hardlinks, because different objects in the overlay can return the same st_ino/st_dev pair from stat(2). For broken hardlinks, we do not provide constant st_ino on copy up to avoid this inconsistency. When NFS export feature is enabled, apply the same logic to files and directories with unverified lower origin. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 94d2f8a8b779..96587075db11 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -105,12 +105,20 @@ int ovl_getattr(const struct path *path, struct kstat *stat, * Lower hardlinks may be broken on copy up to different * upper files, so we cannot use the lower origin st_ino * for those different files, even for the same fs case. + * + * Similarly, several redirected dirs can point to the + * same dir on a lower layer. With the "verify_lower" + * feature, we do not use the lower origin st_ino, if + * we haven't verified that this redirect is unique. + * * With inodes index enabled, it is safe to use st_ino - * of an indexed hardlinked origin. The index validates - * that the upper hardlink is not broken. + * of an indexed origin. The index validates that the + * upper hardlink is not broken and that a redirected + * dir is the only redirect to that origin. */ - if (is_dir || lowerstat.nlink == 1 || - ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || + (!ovl_verify_lower(dentry->d_sb) && + (is_dir || lowerstat.nlink == 1))) stat->ino = lowerstat.ino; if (samefs) From ad1d615cec1c973aa222c065997a77e7cd5a0d17 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 Jan 2018 10:47:03 +0200 Subject: [PATCH 18/51] ovl: use directory index entries for consistency verification A directory index is a directory type entry in index dir with a "trusted.overlay.upper" xattr containing an encoded ovl_fh of the merge directory upper dir inode. On lookup of non-dir files, lower file is followed by origin file handle. On lookup of dir entries, lower dir is found by name and then compared to origin file handle. We only trust dir index if we verified that lower dir matches origin file handle, otherwise index may be inconsistent and we ignore it. If we find an indexed non-upper dir or an indexed merged dir, whose index 'upper' xattr points to a different upper dir, that means that the lower directory may be also referenced by another upper dir via redirect, so we fail the lookup on inconsistency error. To be consistent with directory index entries format, the association of index dir to upper root dir, that was stored by older kernels in "trusted.overlay.origin" xattr is now stored in "trusted.overlay.upper" xattr. This also serves as an indication that overlay was mounted with a kernel that support index directory entries. For backward compatibility, if an 'origin' xattr exists on the index dir we also verify it on mount. Directory index entries are going to be used for NFS export. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 42 +++++++++++++++++++++++++++++++++------- fs/overlayfs/overlayfs.h | 7 +++++++ fs/overlayfs/super.c | 20 +++++++++++++++---- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 69ca8eb07519..b00d909e7326 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -538,6 +538,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, struct dentry *index; struct inode *inode; struct qstr name; + bool is_dir = d_is_dir(origin); int err; err = ovl_get_index_name(origin, &name); @@ -561,8 +562,6 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { goto out_dput; - } else if (upper && d_inode(upper) != inode) { - goto out_dput; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -576,8 +575,25 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; - } + } else if (is_dir) { + if (!upper) { + pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", + origin, index); + goto fail; + } + /* Verify that dir index 'upper' xattr points to upper dir */ + err = ovl_verify_upper(index, upper, false); + if (err) { + if (err == -ESTALE) { + pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", + upper, origin, index); + } + goto fail; + } + } else if (upper && d_inode(upper) != inode) { + goto out_dput; + } out: kfree(name.name); return index; @@ -646,6 +662,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct ovl_path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; + struct dentry *origin = NULL; struct dentry *index = NULL; unsigned int ctr = 0; struct inode *inode = NULL; @@ -739,7 +756,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, /* * When "verify_lower" feature is enabled, do not merge with a - * lower dir that does not match a stored origin xattr. + * lower dir that does not match a stored origin xattr. In any + * case, only verified origin is used for index lookup. */ if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) { err = ovl_verify_origin(upperdentry, this, false); @@ -747,6 +765,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, dput(this); break; } + + /* Bless lower dir as verified origin */ + origin = this; } stack[ctr].dentry = this; @@ -780,10 +801,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } } - /* Lookup index by lower inode and verify it matches upper inode */ - if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { - struct dentry *origin = stack[0].dentry; + /* + * Lookup index by lower inode and verify it matches upper inode. + * We only trust dir index if we verified that lower dir matches + * origin, otherwise dir index entries may be inconsistent and we + * ignore them. Always lookup index of non-dir and non-upper. + */ + if (ctr && (!upperdentry || !d.is_dir)) + origin = stack[0].dentry; + if (origin && ovl_indexdir(dentry->d_sb) && + (!d.is_dir || ovl_index_all(dentry->d_sb))) { index = ovl_lookup_index(dentry, upperdentry, origin); if (IS_ERR(index)) { err = PTR_ERR(index); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index db75955f9677..25794a3a3fe1 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -27,6 +27,7 @@ enum ovl_path_type { #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" +#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" enum ovl_flag { /* Pure upper dir that may contain non pure upper entries */ @@ -266,6 +267,12 @@ static inline int ovl_verify_origin(struct dentry *upper, return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set); } +static inline int ovl_verify_upper(struct dentry *index, + struct dentry *upper, bool set) +{ + return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set); +} + /* readdir.c */ extern const struct file_operations ovl_dir_operations; int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 1d538be87fa0..170c184a9f43 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1078,11 +1078,23 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (ofs->indexdir) { - /* Verify upper root is exclusively associated with index dir */ - err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, - upperpath->dentry, true, true); + /* + * Verify upper root is exclusively associated with index dir. + * Older kernels stored upper fh in "trusted.overlay.origin" + * xattr. If that xattr exists, verify that it is a match to + * upper dir file handle. In any case, verify or set xattr + * "trusted.overlay.upper" to indicate that index may have + * directory entries. + */ + if (ovl_check_origin_xattr(ofs->indexdir)) { + err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, + upperpath->dentry, true, false); + if (err) + pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); + } + err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); if (err) - pr_err("overlayfs: failed to verify index dir origin\n"); + pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); /* Cleanup bad/stale/orphan index entries */ if (!err) From 7db25d36d9253c58afd3db837dd53e66ae3b1ac9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 Jan 2018 11:03:13 +0200 Subject: [PATCH 19/51] ovl: verify whiteout index entries on mount Whiteout index entries are used as an indication that an exported overlay file handle should be treated as stale (i.e. after unlink of the overlay inode). Check on mount that whiteout index entries have a name that looks like a valid file handle and cleanup invalid index entries. For whiteout index entries, do not check that they also have valid origin fh and nlink xattr, because those xattr do not exist for a whiteout index entry. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index b00d909e7326..c6c79753b3b3 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -439,16 +439,13 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) /* * Directory index entries are going to be used for looking up * redirected upper dirs by lower dir fh when decoding an overlay - * file handle of a merge dir. Whiteout index entries are going to be - * used as an indication that an exported overlay file handle should - * be treated as stale (i.e. after unlink of the overlay inode). - * We don't know the verification rules for directory and whiteout - * index entries, because they have not been implemented yet, so return - * EINVAL if those entries are found to abort the mount to avoid - * corrupting an index that was created by a newer kernel. + * file handle of a merge dir. We don't know the verification rules + * for directory index entries, because they have not been implemented + * yet, so return EINVAL if those entries are found to abort the mount + * and to avoid corrupting an index that was created by a newer kernel. */ err = -EINVAL; - if (d_is_dir(index) || ovl_is_whiteout(index)) + if (d_is_dir(index)) goto fail; if (index->d_name.len < sizeof(struct ovl_fh)*2) @@ -468,6 +465,14 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) if (err) goto fail; + /* + * Whiteout index entries are used as an indication that an exported + * overlay file handle should be treated as stale (i.e. after unlink + * of the overlay inode). These entries contain no origin xattr. + */ + if (ovl_is_whiteout(index)) + goto out; + err = ovl_verify_fh(index, OVL_XATTR_ORIGIN, fh); if (err) goto fail; From e8f9e5b780b0406ab81add72f1a05583ae5d40ac Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 Jan 2018 11:33:24 +0200 Subject: [PATCH 20/51] ovl: verify directory index entries on mount Directory index entries should have 'upper' xattr pointing to the real upper dir. Verifying that the upper dir file handle is not stale is expensive, so only verify stale directory index entries on mount if NFS export feature is enabled. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/dcache.c | 1 + fs/overlayfs/namei.c | 127 ++++++++++++++++++++++++++++++++----------- 2 files changed, 95 insertions(+), 33 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 5c7df1df81ff..b5d5ea984ac4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3527,6 +3527,7 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) return result; } +EXPORT_SYMBOL(is_subdir); static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index c6c79753b3b3..881caa385a36 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -84,7 +84,19 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, static int ovl_acceptable(void *ctx, struct dentry *dentry) { - return 1; + /* + * A non-dir origin may be disconnected, which is fine, because + * we only need it for its unique inode number. + */ + if (!d_is_dir(dentry)) + return 1; + + /* Don't decode a deleted empty directory */ + if (d_unhashed(dentry)) + return 0; + + /* Check if directory belongs to the layer we are decoding from */ + return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root); } /* @@ -160,7 +172,7 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) { - struct dentry *origin; + struct dentry *real; int bytes; /* @@ -171,22 +183,28 @@ static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) return NULL; bytes = (fh->len - offsetof(struct ovl_fh, fid)); - origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, - bytes >> 2, (int)fh->type, - ovl_acceptable, NULL); - if (IS_ERR(origin)) { - /* Treat stale file handle as "origin unknown" */ - if (origin == ERR_PTR(-ESTALE)) - origin = NULL; - return origin; + real = exportfs_decode_fh(mnt, (struct fid *)fh->fid, + bytes >> 2, (int)fh->type, + ovl_acceptable, mnt); + if (IS_ERR(real)) { + /* + * Treat stale file handle to lower file as "origin unknown". + * upper file handle could become stale when upper file is + * unlinked and this information is needed to handle stale + * index entries correctly. + */ + if (real == ERR_PTR(-ESTALE) && + !(fh->flags & OVL_FH_FLAG_PATH_UPPER)) + real = NULL; + return real; } - if (ovl_dentry_weird(origin)) { - dput(origin); + if (ovl_dentry_weird(real)) { + dput(real); return NULL; } - return origin; + return real; } static bool ovl_is_opaquedir(struct dentry *dentry) @@ -420,6 +438,35 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name, goto out; } +/* Get upper dentry from index */ +static struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) +{ + struct ovl_fh *fh; + struct dentry *upper; + + if (!d_is_dir(index)) + return dget(index); + + fh = ovl_get_fh(index, OVL_XATTR_UPPER); + if (IS_ERR_OR_NULL(fh)) + return ERR_CAST(fh); + + upper = ovl_decode_fh(fh, ofs->upper_mnt); + kfree(fh); + + if (IS_ERR_OR_NULL(upper)) + return upper ?: ERR_PTR(-ESTALE); + + if (!d_is_dir(upper)) { + pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n", + index, upper); + dput(upper); + return ERR_PTR(-EIO); + } + + return upper; +} + /* * Verify that an index entry name matches the origin file handle stored in * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. @@ -431,23 +478,13 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) size_t len; struct ovl_path origin = { }; struct ovl_path *stack = &origin; + struct dentry *upper = NULL; int err; if (!d_inode(index)) return 0; - /* - * Directory index entries are going to be used for looking up - * redirected upper dirs by lower dir fh when decoding an overlay - * file handle of a merge dir. We don't know the verification rules - * for directory index entries, because they have not been implemented - * yet, so return EINVAL if those entries are found to abort the mount - * and to avoid corrupting an index that was created by a newer kernel. - */ err = -EINVAL; - if (d_is_dir(index)) - goto fail; - if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; @@ -473,21 +510,45 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) if (ovl_is_whiteout(index)) goto out; - err = ovl_verify_fh(index, OVL_XATTR_ORIGIN, fh); + /* + * Verifying directory index entries are not stale is expensive, so + * only verify stale dir index if NFS export is enabled. + */ + if (d_is_dir(index) && !ofs->config.nfs_export) + goto out; + + /* + * Directory index entries should have 'upper' xattr pointing to the + * real upper dir. Non-dir index entries are hardlinks to the upper + * real inode. For non-dir index, we can read the copy up origin xattr + * directly from the index dentry, but for dir index we first need to + * decode the upper directory. + */ + upper = ovl_index_upper(ofs, index); + if (IS_ERR_OR_NULL(upper)) { + err = PTR_ERR(upper); + if (!err) + err = -ESTALE; + goto fail; + } + + err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh); + dput(upper); if (err) goto fail; - err = ovl_check_origin_fh(ofs, fh, index, &stack); - if (err) - goto fail; + /* Check if non-dir index is orphan and don't warn before cleaning it */ + if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) { + err = ovl_check_origin_fh(ofs, fh, index, &stack); + if (err) + goto fail; - /* Check if index is orphan and don't warn before cleaning it */ - if (d_inode(index)->i_nlink == 1 && - ovl_get_nlink(origin.dentry, index, 0) == 0) - err = -ENOENT; + if (ovl_get_nlink(origin.dentry, index, 0) == 0) + err = -ENOENT; + } - dput(origin.dentry); out: + dput(origin.dentry); kfree(fh); return err; From 9ee60ce2491166c73a381e5f04dc4c3a147e169d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 1 Nov 2017 10:13:51 +0200 Subject: [PATCH 21/51] ovl: cleanup temp index entries A previous failed attempt to create or whiteout a directory index may leave index entries named '#%x' in the index dir. Cleanup those temp entries on mount instead of failing the mount. In the future, we may drop 'work' dir and use 'index' dir instead. This change is enough for cleaning up copy up leftovers 'from the future', but it is not enough for cleaning up rmdir leftovers 'from the future' (i.e. temp dir containing whiteouts). Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 881caa385a36..7f27ec5999ea 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -467,6 +468,12 @@ static struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) return upper; } +/* Is this a leftover from create/whiteout of directory index entry? */ +static bool ovl_is_temp_index(struct dentry *index) +{ + return index->d_name.name[0] == '#'; +} + /* * Verify that an index entry name matches the origin file handle stored in * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. @@ -484,6 +491,11 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) if (!d_inode(index)) return 0; + /* Cleanup leftover from index create/cleanup attempt */ + err = -ESTALE; + if (ovl_is_temp_index(index)) + goto fail; + err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; From 24b33ee104ecd5a4e1e71412f8966199d6a0bf02 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 26 Sep 2017 07:55:26 +0300 Subject: [PATCH 22/51] ovl: create ovl_need_index() helper The helper determines which lower file needs to be indexed on copy up and before nlink changes. For index=on, the helper evaluates to true for lower hardlinks. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 6 +----- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/util.c | 24 ++++++++++++++++++++---- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 503c92404095..103e62dcb745 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -536,11 +536,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; - bool indexed = false; - - if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) && - c->stat.nlink > 1) - indexed = true; + bool indexed = ovl_need_index(c->dentry); if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) c->origin = true; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 25794a3a3fe1..d7e65284c13b 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -241,6 +241,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode); bool ovl_test_flag(unsigned long flag, struct inode *inode); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); +bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry, bool *locked); void ovl_nlink_end(struct dentry *dentry, bool locked); int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index ae81d878248e..55ab99131a72 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -463,6 +463,23 @@ void ovl_inuse_unlock(struct dentry *dentry) } } +/* + * Does this overlay dentry need to be indexed on copy up? + */ +bool ovl_need_index(struct dentry *dentry) +{ + struct dentry *lower = ovl_dentry_lower(dentry); + + if (!lower || !ovl_indexdir(dentry->d_sb)) + return false; + + /* Index only lower hardlinks on copy up */ + if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1) + return true; + + return false; +} + /* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { @@ -533,11 +550,11 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) /* * With inodes index is enabled, we store the union overlay nlink - * in an xattr on the index inode. When whiting out lower hardlinks + * in an xattr on the index inode. When whiting out an indexed lower, * we need to decrement the overlay persistent nlink, but before the * first copy up, we have no upper index inode to store the xattr. * - * As a workaround, before whiteout/rename over of a lower hardlink, + * As a workaround, before whiteout/rename over an indexed lower, * copy up to create the upper index. Creating the upper index will * initialize the overlay nlink, so it could be dropped if unlink * or rename succeeds. @@ -545,8 +562,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) * TODO: implement metadata only index copy up when called with * ovl_copy_up_flags(dentry, O_PATH). */ - if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) && - d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) { + if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) { err = ovl_copy_up(dentry); if (err) return err; From fbd2d2074bde2e333d9a9eeda5864cd593fbe29c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Nov 2017 00:08:21 +0200 Subject: [PATCH 23/51] ovl: index all non-dir on copy up for NFS export With the NFS export feature enabled, all non-dir are indexed on copy up. The copy up origin inode of an indexed non-dir can be used as a unique identifier of the overlay object. The full index is also used for consistency verfication, like detecting multiple non-hardlink uppers with the same 'origin' on lookup. Directory index on copy up will be implemented by following patch. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/util.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 55ab99131a72..7cb930e367be 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -473,6 +473,10 @@ bool ovl_need_index(struct dentry *dentry) if (!lower || !ovl_indexdir(dentry->d_sb)) return false; + /* Index all files for NFS export and consistency verification */ + if (!d_is_dir(lower) && ovl_index_all(dentry->d_sb)) + return true; + /* Index only lower hardlinks on copy up */ if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1) return true; From 016b720f5558d825bc0a4c6d2bdd6929fbe86536 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 Jan 2018 14:01:08 +0200 Subject: [PATCH 24/51] ovl: index directories on copy up for NFS export With the NFS export feature enabled, all dirs are indexed on copy up. Non-dir files are copied up directly to indexdir and then hardlinked to upper dir. Directories are copied up to indexdir, then an index entry is created in indexdir with 'upper' xattr pointing to the copied up dir and then the copied up dir is moved to upper dir. Directory index is also used for consistency verification, like detecting multiple redirected dirs to the same lower dir on lookup. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 122 +++++++++++++++++++++++++++++++++++++++-- fs/overlayfs/util.c | 2 +- 2 files changed, 117 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 103e62dcb745..8ef25d8c3cfe 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -315,6 +315,94 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower, return err; } +/* Store file handle of @upper dir in @index dir entry */ +static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index) +{ + const struct ovl_fh *fh; + int err; + + fh = ovl_encode_fh(upper, true); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0); + + kfree(fh); + return err; +} + +/* + * Create and install index entry. + * + * Caller must hold i_mutex on indexdir. + */ +static int ovl_create_index(struct dentry *dentry, struct dentry *origin, + struct dentry *upper) +{ + struct dentry *indexdir = ovl_indexdir(dentry->d_sb); + struct inode *dir = d_inode(indexdir); + struct dentry *index = NULL; + struct dentry *temp = NULL; + struct qstr name = { }; + int err; + + /* + * For now this is only used for creating index entry for directories, + * because non-dir are copied up directly to index and then hardlinked + * to upper dir. + * + * TODO: implement create index for non-dir, so we can call it when + * encoding file handle for non-dir in case index does not exist. + */ + if (WARN_ON(!d_is_dir(dentry))) + return -EIO; + + /* Directory not expected to be indexed before copy up */ + if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) + return -EIO; + + err = ovl_get_index_name(origin, &name); + if (err) + return err; + + temp = ovl_lookup_temp(indexdir); + if (IS_ERR(temp)) + goto temp_err; + + err = ovl_do_mkdir(dir, temp, S_IFDIR, true); + if (err) + goto out; + + err = ovl_set_upper_fh(upper, temp); + if (err) + goto out_cleanup; + + index = lookup_one_len(name.name, indexdir, name.len); + if (IS_ERR(index)) { + err = PTR_ERR(index); + } else { + err = ovl_do_rename(dir, temp, dir, index, 0); + dput(index); + } + + if (err) + goto out_cleanup; + +out: + dput(temp); + kfree(name.name); + return err; + +temp_err: + err = PTR_ERR(temp); + temp = NULL; + goto out; + +out_cleanup: + ovl_cleanup(dir, temp); + goto out; +} + struct ovl_copy_up_ctx { struct dentry *parent; struct dentry *dentry; @@ -327,6 +415,7 @@ struct ovl_copy_up_ctx { struct dentry *workdir; bool tmpfile; bool origin; + bool indexed; }; static int ovl_link_up(struct ovl_copy_up_ctx *c) @@ -498,6 +587,12 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) if (err) goto out_cleanup; + if (S_ISDIR(c->stat.mode) && c->indexed) { + err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); + if (err) + goto out_cleanup; + } + if (c->tmpfile) { inode_lock_nested(udir, I_MUTEX_PARENT); err = ovl_install_temp(c, temp, &newdentry); @@ -536,12 +631,26 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; - bool indexed = ovl_need_index(c->dentry); + bool to_index = false; - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) + /* + * Indexed non-dir is copied up directly to the index entry and then + * hardlinked to upper dir. Indexed dir is copied up to indexdir, + * then index entry is created and then copied up dir installed. + * Copying dir up to indexdir instead of workdir simplifies locking. + */ + if (ovl_need_index(c->dentry)) { + c->indexed = true; + if (S_ISDIR(c->stat.mode)) + c->workdir = ovl_indexdir(c->dentry->d_sb); + else + to_index = true; + } + + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) c->origin = true; - if (indexed) { + if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); if (err) @@ -568,9 +677,10 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) } } - if (indexed) { - if (!err) - ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + if (!err && c->indexed) + ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + + if (to_index) { kfree(c->destname.name); } else if (!err) { struct inode *udir = d_inode(c->destdir); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 7cb930e367be..71ddc4f8864e 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -474,7 +474,7 @@ bool ovl_need_index(struct dentry *dentry) return false; /* Index all files for NFS export and consistency verification */ - if (!d_is_dir(lower) && ovl_index_all(dentry->d_sb)) + if (ovl_index_all(dentry->d_sb)) return true; /* Index only lower hardlinks on copy up */ From 89a17556ce4d113f6e7896e118a14f79a84484e9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 26 Sep 2017 07:40:37 +0300 Subject: [PATCH 25/51] ovl: cleanup dir index when dir nlink drops to zero When non-dir index union nlink drops to zero the non-dir index is cleaned. Do the same for directory type index entries when union directory is removed. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 71ddc4f8864e..6b11e116f190 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -500,7 +500,7 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; inode = d_inode(upperdentry); - if (inode->i_nlink != 1) { + if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", upperdentry, inode->i_ino, inode->i_nlink); /* @@ -549,7 +549,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) const struct cred *old_cred; int err; - if (!d_inode(dentry) || d_is_dir(dentry)) + if (!d_inode(dentry)) return 0; /* @@ -576,7 +576,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) if (err) return err; - if (!ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry))) goto out; old_cred = ovl_override_creds(dentry->d_sb); From e7dd0e71348c1e3bc4b9d767c1ffbcbdee46a726 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 24 Oct 2017 17:38:33 +0300 Subject: [PATCH 26/51] ovl: whiteout index when union nlink drops to zero With NFS export feature enabled, when overlay inode nlink drops to zero, instead of removing the index entry, replace it with a whiteout index entry. This is needed for NFS export in order to prevent future open by handle from opening the lower file directly. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 58 ++++++++++++++++++++++++---------------- fs/overlayfs/overlayfs.h | 2 ++ fs/overlayfs/util.c | 17 +++++++----- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index a1a7606d4891..839709c7803a 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -63,8 +63,7 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir) } /* caller holds i_mutex on workdir */ -static struct dentry *ovl_whiteout(struct dentry *workdir, - struct dentry *dentry) +static struct dentry *ovl_whiteout(struct dentry *workdir) { int err; struct dentry *whiteout; @@ -83,6 +82,38 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, return whiteout; } +/* Caller must hold i_mutex on both workdir and dir */ +int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, + struct dentry *dentry) +{ + struct inode *wdir = workdir->d_inode; + struct dentry *whiteout; + int err; + int flags = 0; + + whiteout = ovl_whiteout(workdir); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + return err; + + if (d_is_dir(dentry)) + flags = RENAME_EXCHANGE; + + err = ovl_do_rename(wdir, whiteout, dir, dentry, flags); + if (err) + goto kill_whiteout; + if (flags) + ovl_cleanup(wdir, dentry); + +out: + dput(whiteout); + return err; + +kill_whiteout: + ovl_cleanup(wdir, whiteout); + goto out; +} + int ovl_create_real(struct inode *dir, struct dentry *newdentry, struct cattr *attr, struct dentry *hardlink, bool debug) { @@ -591,14 +622,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, struct list_head *list) { struct dentry *workdir = ovl_workdir(dentry); - struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; - struct dentry *whiteout; struct dentry *upper; struct dentry *opaquedir = NULL; int err; - int flags = 0; if (WARN_ON(!workdir)) return -EROFS; @@ -627,24 +654,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, goto out_dput_upper; } - whiteout = ovl_whiteout(workdir, dentry); - err = PTR_ERR(whiteout); - if (IS_ERR(whiteout)) - goto out_dput_upper; - - if (d_is_dir(upper)) - flags = RENAME_EXCHANGE; - - err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper); if (err) - goto kill_whiteout; - if (flags) - ovl_cleanup(wdir, upper); + goto out_d_drop; ovl_dentry_version_inc(dentry->d_parent, true); out_d_drop: d_drop(dentry); - dput(whiteout); out_dput_upper: dput(upper); out_unlock: @@ -653,10 +669,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, dput(opaquedir); out: return err; - -kill_whiteout: - ovl_cleanup(wdir, whiteout); - goto out_d_drop; } static int ovl_remove_upper(struct dentry *dentry, bool is_dir, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d7e65284c13b..8f4313c6693b 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -321,6 +321,8 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; struct dentry *ovl_lookup_temp(struct dentry *workdir); +int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, + struct dentry *dentry); struct cattr { dev_t rdev; umode_t mode; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 6b11e116f190..aa2234d52007 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -487,7 +487,8 @@ bool ovl_need_index(struct dentry *dentry) /* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { - struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; + struct dentry *indexdir = ovl_indexdir(dentry->d_sb); + struct inode *dir = indexdir->d_inode; struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; @@ -518,13 +519,17 @@ static void ovl_cleanup_index(struct dentry *dentry) } inode_lock_nested(dir, I_MUTEX_PARENT); - /* TODO: whiteout instead of cleanup to block future open by handle */ - index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len); + index = lookup_one_len(name.name, indexdir, name.len); err = PTR_ERR(index); - if (!IS_ERR(index)) - err = ovl_cleanup(dir, index); - else + if (IS_ERR(index)) { index = NULL; + } else if (ovl_index_all(dentry->d_sb)) { + /* Whiteout orphan index to block future open by handle */ + err = ovl_cleanup_and_whiteout(indexdir, dir, index); + } else { + /* Cleanup orphan index entries */ + err = ovl_cleanup(dir, index); + } inode_unlock(dir); if (err) From 24f0b17203691d22815e842051a014e3bde7c227 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 Jan 2018 15:33:51 +0200 Subject: [PATCH 27/51] ovl: whiteout orphan index entries on mount Orphan index entries are non-dir index entries whose union nlink count dropped to zero. With index=on, orphan index entries are removed on mount. With NFS export feature enabled, orphan index entries are replaced with white out index entries to block future open by handle from opening the lower file. When dir index has a stale 'upper' xattr, we assume that the upper dir was removed and we treat the dir index as orphan entry that needs to be whited out or removed. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 19 +++++++++++++++++-- fs/overlayfs/readdir.c | 25 +++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 7f27ec5999ea..111a64f904c2 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -539,7 +539,15 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) upper = ovl_index_upper(ofs, index); if (IS_ERR_OR_NULL(upper)) { err = PTR_ERR(upper); - if (!err) + /* + * Directory index entries with no 'upper' xattr need to be + * removed. When dir index entry has a stale 'upper' xattr, + * we assume that upper dir was removed and we treat the dir + * index as orphan entry that needs to be whited out. + */ + if (err == -ESTALE) + goto orphan; + else if (!err) err = -ESTALE; goto fail; } @@ -556,7 +564,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) goto fail; if (ovl_get_nlink(origin.dentry, index, 0) == 0) - err = -ENOENT; + goto orphan; } out: @@ -568,6 +576,13 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", index, d_inode(index)->i_mode & S_IFMT, err); goto out; + +orphan: + pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(index)->i_nlink); + err = -ENOENT; + goto out; } /* diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 4c660c7085b7..c11f5c0906c3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1067,12 +1067,33 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) break; } err = ovl_verify_index(ofs, index); - /* Cleanup stale and orphan index entries */ - if (err && (err == -ESTALE || err == -ENOENT)) + if (!err) { + goto next; + } else if (err == -ESTALE) { + /* Cleanup stale index entries */ err = ovl_cleanup(dir, index); + } else if (err != -ENOENT) { + /* + * Abort mount to avoid corrupting the index if + * an incompatible index entry was found or on out + * of memory. + */ + break; + } else if (ofs->config.nfs_export) { + /* + * Whiteout orphan index to block future open by + * handle after overlay nlink dropped to zero. + */ + err = ovl_cleanup_and_whiteout(indexdir, dir, index); + } else { + /* Cleanup orphan index entries */ + err = ovl_cleanup(dir, index); + } + if (err) break; +next: dput(index); index = NULL; } From 91ffe7beb31e7e1e689a59f5ef56acea0811d81c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 28 Dec 2017 20:23:05 +0200 Subject: [PATCH 28/51] ovl: factor out ovl_get_index_fh() helper The helper is needed to lookup an index by file handle for NFS export. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 59 +++++++++++++++++++++++++++++++++------- fs/overlayfs/overlayfs.h | 1 + 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 111a64f904c2..49984c9f3689 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -585,6 +585,21 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) goto out; } +static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) +{ + char *n, *s; + + n = kzalloc(fh->len * 2, GFP_KERNEL); + if (!n) + return -ENOMEM; + + s = bin2hex(n, fh, fh->len); + *name = (struct qstr) QSTR_INIT(n, s - n); + + return 0; + +} + /* * Lookup in indexdir for the index entry of a lower real inode or a copy up * origin inode. The index entry name is the hex representation of the lower @@ -602,25 +617,49 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) */ int ovl_get_index_name(struct dentry *origin, struct qstr *name) { - int err; struct ovl_fh *fh; - char *n, *s; + int err; fh = ovl_encode_fh(origin, false); if (IS_ERR(fh)) return PTR_ERR(fh); - err = -ENOMEM; - n = kzalloc(fh->len * 2, GFP_KERNEL); - if (n) { - s = bin2hex(n, fh, fh->len); - *name = (struct qstr) QSTR_INIT(n, s - n); - err = 0; - } + err = ovl_get_index_name_fh(fh, name); + kfree(fh); - return err; +} +/* Lookup index by file handle for NFS export */ +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh) +{ + struct dentry *index; + struct qstr name; + int err; + + err = ovl_get_index_name_fh(fh, &name); + if (err) + return ERR_PTR(err); + + index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); + kfree(name.name); + if (IS_ERR(index)) { + if (PTR_ERR(index) == -ENOENT) + index = NULL; + return index; + } + + if (d_is_negative(index)) + err = 0; + else if (ovl_is_whiteout(index)) + err = -ESTALE; + else if (ovl_dentry_weird(index)) + err = -EIO; + else + return index; + + dput(index); + return ERR_PTR(err); } static struct dentry *ovl_lookup_index(struct dentry *dentry, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 8f4313c6693b..4e784f6ff484 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -257,6 +257,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name, struct dentry *real, bool is_upper, bool set); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); From 0aceb53e73befee4441c9e68d23cb4f682382171 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2017 23:43:16 +0200 Subject: [PATCH 29/51] ovl: do not pass overlay dentry to ovl_get_inode() This is needed for using ovl_get_inode() for decoding file handles for NFS export. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 16 +++++++--------- fs/overlayfs/namei.c | 3 ++- fs/overlayfs/overlayfs.h | 5 +++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 96587075db11..f8f7facb7331 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -643,14 +643,14 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, - struct dentry *index) +struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, + struct dentry *lowerdentry, struct dentry *index, + unsigned int numlower) { - struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; /* Already indexed or could be indexed on copy up? */ - bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + bool indexed = (index || (ovl_indexdir(sb) && !upperdentry)); struct dentry *origin = indexed ? lowerdentry : NULL; bool is_dir; @@ -675,7 +675,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, struct inode *key = d_inode(origin ?: upperdentry); unsigned int nlink = is_dir ? 1 : realinode->i_nlink; - inode = iget5_locked(dentry->d_sb, (unsigned long) key, + inode = iget5_locked(sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); if (!inode) goto out_nomem; @@ -699,7 +699,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { - inode = new_inode(dentry->d_sb); + inode = new_inode(sb); if (!inode) goto out_nomem; } @@ -711,9 +711,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, /* Check for non-merge dir that may have whiteouts */ if (is_dir) { - struct ovl_entry *oe = dentry->d_fsdata; - - if (((upperdentry && lowerdentry) || oe->numlower > 1) || + if (((upperdentry && lowerdentry) || numlower > 1) || ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { ovl_set_flag(OVL_WHITEOUTS, inode); } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 49984c9f3689..d69ea0a385f6 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -967,7 +967,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperdentry = dget(index); if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry, index); + inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index, + ctr); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 4e784f6ff484..f9fce7a680cd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -307,8 +307,9 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, - struct dentry *index); +struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, + struct dentry *lowerdentry, struct dentry *index, + unsigned int numlower); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; From 829c28be9bb9a05aa7eeb8a68a4536cca2d3d694 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 29 Sep 2017 21:43:07 +0300 Subject: [PATCH 30/51] ovl: use d_splice_alias() in place of d_add() in lookup This is required for NFS export. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index d69ea0a385f6..9d3ccbd95dde 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -982,9 +982,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, dput(index); kfree(stack); kfree(d.redirect); - d_add(dentry, inode); - - return NULL; + return d_splice_alias(inode, dentry); out_free_oe: dentry->d_fsdata = NULL; From aa3ff3c152ff94ef045ed802db7535167f8a21ab Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 15 Oct 2017 18:00:20 +0300 Subject: [PATCH 31/51] ovl: copy up of disconnected dentries With NFS export, some operations on decoded file handles (e.g. open, link, setattr, xattr_set) may call copy up with a disconnected non-dir. In this case, we will copy up lower inode to index dir without linking it to upper dir. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 56 +++++++++++++++++++++++++++++++----------- fs/overlayfs/inode.c | 4 ++- fs/overlayfs/util.c | 7 +++--- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 8ef25d8c3cfe..d855f508fa20 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -450,7 +450,10 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) } } inode_unlock(udir); - ovl_set_nlink_upper(c->dentry); + if (err) + return err; + + err = ovl_set_nlink_upper(c->dentry); return err; } @@ -655,6 +658,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); if (err) return err; + } else if (WARN_ON(!c->parent)) { + /* Disconnected dentry must be copied up to index dir */ + return -EIO; } else { /* * Mark parent "impure" because it may now contain non-pure @@ -677,12 +683,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) } } - if (!err && c->indexed) + + if (err) + goto out; + + if (c->indexed) ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); if (to_index) { - kfree(c->destname.name); - } else if (!err) { + /* Initialize nlink for copy up of disconnected dentry */ + err = ovl_set_nlink_upper(c->dentry); + } else { struct inode *udir = d_inode(c->destdir); /* Restore timestamps on parent (best effort) */ @@ -693,6 +704,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); } +out: + if (to_index) + kfree(c->destname.name); return err; } @@ -717,14 +731,17 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, if (err) return err; - ovl_path_upper(parent, &parentpath); - ctx.destdir = parentpath.dentry; - ctx.destname = dentry->d_name; + if (parent) { + ovl_path_upper(parent, &parentpath); + ctx.destdir = parentpath.dentry; + ctx.destname = dentry->d_name; - err = vfs_getattr(&parentpath, &ctx.pstat, - STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); - if (err) - return err; + err = vfs_getattr(&parentpath, &ctx.pstat, + STATX_ATIME | STATX_MTIME, + AT_STATX_SYNC_AS_STAT); + if (err) + return err; + } /* maybe truncate regular file. this has no effect on dirs */ if (flags & O_TRUNC) @@ -745,7 +762,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } else { if (!ovl_dentry_upper(dentry)) err = ovl_do_copy_up(&ctx); - if (!err && !ovl_dentry_has_upper_alias(dentry)) + if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) err = ovl_link_up(&ctx); ovl_copy_up_end(dentry); } @@ -758,10 +775,19 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; const struct cred *old_cred = ovl_override_creds(dentry->d_sb); + bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED); + + /* + * With NFS export, copy up can get called for a disconnected non-dir. + * In this case, we will copy up lower inode to index dir without + * linking it to upper dir. + */ + if (WARN_ON(disconnected && d_is_dir(dentry))) + return -EIO; while (!err) { struct dentry *next; - struct dentry *parent; + struct dentry *parent = NULL; /* * Check if copy-up has happened as well as for upper alias (in @@ -777,12 +803,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) * with rename. */ if (ovl_dentry_upper(dentry) && - ovl_dentry_has_upper_alias(dentry)) + (ovl_dentry_has_upper_alias(dentry) || disconnected)) break; next = dget(dentry); /* find the topmost dentry not yet copied up */ - for (;;) { + for (; !disconnected;) { parent = dget_parent(next); if (ovl_dentry_upper(parent)) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f8f7facb7331..bfd7c766b5cd 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -351,8 +351,10 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) { + /* Copy up of disconnected dentry does not set upper alias */ if (ovl_dentry_upper(dentry) && - ovl_dentry_has_upper_alias(dentry)) + (ovl_dentry_has_upper_alias(dentry) || + (dentry->d_flags & DCACHE_DISCONNECTED))) return false; if (special_file(d_inode(dentry)->i_mode)) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index aa2234d52007..68541eb5be8e 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -229,9 +229,10 @@ void ovl_dentry_set_opaque(struct dentry *dentry) } /* - * For hard links it's possible for ovl_dentry_upper() to return positive, while - * there's no actual upper alias for the inode. Copy up code needs to know - * about the existence of the upper alias, so it can't use ovl_dentry_upper(). + * For hard links and decoded file handles, it's possible for ovl_dentry_upper() + * to return positive, while there's no actual upper alias for the inode. + * Copy up code needs to know about the existence of the upper alias, so it + * can't use ovl_dentry_upper(). */ bool ovl_dentry_has_upper_alias(struct dentry *dentry) { From c62520a83bceae0bb0b7b3de10c3e81205cd3823 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 14 Jan 2018 19:25:31 +0200 Subject: [PATCH 32/51] ovl: store 'has_upper' and 'opaque' as bit flags We need to make some room in struct ovl_entry to store information about redirected ancestors for NFS export, so cram two booleans as bit flags. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 7 ++++--- fs/overlayfs/overlayfs.h | 10 +++++++++- fs/overlayfs/ovl_entry.h | 8 ++++++-- fs/overlayfs/super.c | 6 +++--- fs/overlayfs/util.c | 30 +++++++++++++++++++----------- 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 9d3ccbd95dde..ca15893cfaa9 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -957,10 +957,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!oe) goto out_put; - oe->opaque = upperopaque; memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); dentry->d_fsdata = oe; + if (upperopaque) + ovl_dentry_set_opaque(dentry); + if (upperdentry) ovl_dentry_set_upper_alias(dentry); else if (index) @@ -1003,7 +1005,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, bool ovl_lower_positive(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; struct ovl_entry *poe = dentry->d_parent->d_fsdata; const struct qstr *name = &dentry->d_name; const struct cred *old_cred; @@ -1016,7 +1017,7 @@ bool ovl_lower_positive(struct dentry *dentry) * whiteout. */ if (!dentry->d_inode) - return oe->opaque; + return ovl_dentry_is_opaque(dentry); /* Negative upper -> positive lower */ if (!ovl_dentry_upper(dentry)) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f9fce7a680cd..2dddcd257eb3 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -29,7 +29,7 @@ enum ovl_path_type { #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" #define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" -enum ovl_flag { +enum ovl_inode_flag { /* Pure upper dir that may contain non pure upper entries */ OVL_IMPURE, /* Non-merge dir that may contain whiteout entries */ @@ -37,6 +37,11 @@ enum ovl_flag { OVL_INDEX, }; +enum ovl_entry_flag { + OVL_E_UPPER_ALIAS, + OVL_E_OPAQUE, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -213,6 +218,9 @@ struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); +void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); +void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry); +bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 6dd60fcf8cb7..bfef6edcc111 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -61,8 +61,7 @@ struct ovl_fs { struct ovl_entry { union { struct { - unsigned long has_upper; - bool opaque; + unsigned long flags; }; struct rcu_head rcu; }; @@ -72,6 +71,11 @@ struct ovl_entry { struct ovl_entry *ovl_alloc_entry(unsigned int numlower); +static inline struct ovl_entry *OVL_E(struct dentry *dentry) +{ + return (struct ovl_entry *) dentry->d_fsdata; +} + struct ovl_inode { struct ovl_dir_cache *cache; const char *redirect; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 170c184a9f43..fccdcfae68e9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1345,15 +1345,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!root_dentry) goto out_free_oe; + root_dentry->d_fsdata = oe; + mntput(upperpath.mnt); if (upperpath.dentry) { - oe->has_upper = true; + ovl_dentry_set_upper_alias(root_dentry); if (ovl_is_impuredir(upperpath.dentry)) ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); } - root_dentry->d_fsdata = oe; - /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 68541eb5be8e..930784a26623 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -210,10 +210,24 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache) OVL_I(inode)->cache = cache; } +void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry) +{ + set_bit(flag, &OVL_E(dentry)->flags); +} + +void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry) +{ + clear_bit(flag, &OVL_E(dentry)->flags); +} + +bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry) +{ + return test_bit(flag, &OVL_E(dentry)->flags); +} + bool ovl_dentry_is_opaque(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - return oe->opaque; + return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry); } bool ovl_dentry_is_whiteout(struct dentry *dentry) @@ -223,9 +237,7 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry) void ovl_dentry_set_opaque(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->opaque = true; + ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); } /* @@ -236,16 +248,12 @@ void ovl_dentry_set_opaque(struct dentry *dentry) */ bool ovl_dentry_has_upper_alias(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->has_upper; + return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry); } void ovl_dentry_set_upper_alias(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->has_upper = true; + ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry); } bool ovl_redirect_dir(struct super_block *sb) From f9c34674bc60e5fc0af6ec6513517ed9182862b9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jan 2018 11:39:52 +0100 Subject: [PATCH 33/51] vfs: factor out helpers d_instantiate_anon() and d_alloc_anon() Those helpers are going to be used by overlayfs to implement NFS export decode. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/dcache.c | 91 +++++++++++++++++++++++++++--------------- include/linux/dcache.h | 2 + 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index b5d5ea984ac4..99bce0ed0213 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1699,9 +1699,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_anon(struct super_block *sb) +{ + return __d_alloc(sb, NULL); +} +EXPORT_SYMBOL(d_alloc_anon); + struct dentry *d_alloc_cursor(struct dentry * parent) { - struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + struct dentry *dentry = d_alloc_anon(parent->d_sb); if (dentry) { dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; dentry->d_parent = dget(parent); @@ -1887,7 +1893,7 @@ struct dentry *d_make_root(struct inode *root_inode) struct dentry *res = NULL; if (root_inode) { - res = __d_alloc(root_inode->i_sb, NULL); + res = d_alloc_anon(root_inode->i_sb); if (res) d_instantiate(res, root_inode); else @@ -1926,11 +1932,54 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) +static struct dentry *__d_instantiate_anon(struct dentry *dentry, + struct inode *inode, + bool disconnected) +{ + struct dentry *res; + unsigned add_flags; + + security_d_instantiate(dentry, inode); + spin_lock(&inode->i_lock); + res = __d_find_any_alias(inode); + if (res) { + spin_unlock(&inode->i_lock); + dput(dentry); + goto out_iput; + } + + /* attach a disconnected dentry */ + add_flags = d_flags_for_inode(inode); + + if (disconnected) + add_flags |= DCACHE_DISCONNECTED; + + spin_lock(&dentry->d_lock); + __d_set_inode_and_type(dentry, inode, add_flags); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + hlist_bl_lock(&dentry->d_sb->s_anon); + hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_anon); + hlist_bl_unlock(&dentry->d_sb->s_anon); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + + return dentry; + + out_iput: + iput(inode); + return res; +} + +struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode) +{ + return __d_instantiate_anon(dentry, inode, true); +} +EXPORT_SYMBOL(d_instantiate_anon); + +static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) { struct dentry *tmp; struct dentry *res; - unsigned add_flags; if (!inode) return ERR_PTR(-ESTALE); @@ -1941,39 +1990,15 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) if (res) goto out_iput; - tmp = __d_alloc(inode->i_sb, NULL); + tmp = d_alloc_anon(inode->i_sb); if (!tmp) { res = ERR_PTR(-ENOMEM); goto out_iput; } - security_d_instantiate(tmp, inode); - spin_lock(&inode->i_lock); - res = __d_find_any_alias(inode); - if (res) { - spin_unlock(&inode->i_lock); - dput(tmp); - goto out_iput; - } + return __d_instantiate_anon(tmp, inode, disconnected); - /* attach a disconnected dentry */ - add_flags = d_flags_for_inode(inode); - - if (disconnected) - add_flags |= DCACHE_DISCONNECTED; - - spin_lock(&tmp->d_lock); - __d_set_inode_and_type(tmp, inode, add_flags); - hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); - hlist_bl_lock(&tmp->d_sb->s_anon); - hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); - hlist_bl_unlock(&tmp->d_sb->s_anon); - spin_unlock(&tmp->d_lock); - spin_unlock(&inode->i_lock); - - return tmp; - - out_iput: +out_iput: iput(inode); return res; } @@ -1998,7 +2023,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) */ struct dentry *d_obtain_alias(struct inode *inode) { - return __d_obtain_alias(inode, 1); + return __d_obtain_alias(inode, true); } EXPORT_SYMBOL(d_obtain_alias); @@ -2019,7 +2044,7 @@ EXPORT_SYMBOL(d_obtain_alias); */ struct dentry *d_obtain_root(struct inode *inode) { - return __d_obtain_alias(inode, 0); + return __d_obtain_alias(inode, false); } EXPORT_SYMBOL(d_obtain_root); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 65cd8ab60b7a..82a99d366aec 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -227,6 +227,7 @@ extern seqlock_t rename_lock; */ extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); +extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); @@ -235,6 +236,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); From a01f64b5c06ca1130b0b72ceb5e2a25e4d37ab08 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 25 May 2017 22:39:21 +0300 Subject: [PATCH 34/51] ovl: document NFS export Document NFS export design. Followup patches will implement this design. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 73 +++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 7a05329844d2..6ea1e64d1464 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -203,10 +203,6 @@ Because lower layer redirects cannot be verified with the index, enabling NFS export support on an overlay filesystem with no upper layer requires turning off redirect follow (e.g. "redirect_dir=nofollow"). -When the NFS export feature is enabled, all directory index entries are -verified on mount time to check that upper file handles are not stale. -This verification may cause significant overhead in some cases. - Non-directories --------------- @@ -334,6 +330,75 @@ found lower directory does not match the stored origin, that directory will not be merged with the upper directory. + +NFS export +---------- + +When the underlying filesystems supports NFS export and the "nfs_export" +feature is enabled, an overlay filesystem may be exported to NFS. + +With the "nfs_export" feature, on copy_up of any lower object, an index +entry is created under the index directory. The index entry name is the +hexadecimal representation of the copy up origin file handle. For a +non-directory object, the index entry is a hard link to the upper inode. +For a directory object, the index entry has an extended attribute +"trusted.overlay.upper" with an encoded file handle of the upper +directory inode. + +When encoding a file handle from an overlay filesystem object, the +following rules apply: + +1. For a non-upper object, encode a lower file handle from lower inode +2. For an indexed object, encode a lower file handle from copy_up origin +3. For a pure-upper object and for an existing non-indexed upper object, + encode an upper file handle from upper inode + +The encoded overlay file handle includes: + - Header including path type information (e.g. lower/upper) + - UUID of the underlying filesystem + - Underlying filesystem encoding of underlying inode + +This encoding format is identical to the encoding format file handles that +are stored in extended attribute "trusted.overlay.origin". + +When decoding an overlay file handle, the following steps are followed: + +1. Find underlying layer by UUID and path type information. +2. Decode the underlying filesystem file handle to underlying dentry. +3. For a lower file handle, lookup the handle in index directory by name. +4. If a whiteout is found in index, return ESTALE. This represents an + overlay object that was deleted after its file handle was encoded. +5. For a non-directory, instantiate a disconnected overlay dentry from the + decoded underlying dentry, the path type and index inode, if found. +6. For a directory, use the connected underlying decoded dentry, path type + and index, to lookup a connected overlay dentry. + +Decoding a non-directory file handle may return a disconnected dentry. +copy_up of that disconnected dentry will create an upper index entry with +no upper alias. + +When overlay filesystem has multiple lower layers, a middle layer +directory may have a "redirect" to lower directory. Because middle layer +"redirects" are not indexed, a lower file handle that was encoded from the +"redirect" origin directory, cannot be used to find the middle or upper +layer directory. Similarly, a lower file handle that was encoded from a +descendant of the "redirect" origin directory, cannot be used to +reconstruct a connected overlay path. To mitigate the cases of +directories that cannot be decoded from a lower file handle, these +directories are copied up on encode and encoded as an upper file handle. +On an overlay filesystem with no upper layer this mitigation cannot be +used NFS export in this setup requires turning off redirect follow (e.g. +"redirect_dir=nofollow"). + +The overlay filesystem does not support non-directory connectable file +handles, so exporting with the 'subtree_check' exportfs configuration will +cause failures to lookup files over NFS. + +When the NFS export feature is enabled, all directory index entries are +verified on mount time to check that upper file handles are not stale. +This verification may cause significant overhead in some cases. + + Testsuite --------- From 8ed5eec9d6c4c013aa657ebefbd10a1a0d15893d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Jul 2017 14:17:16 +0300 Subject: [PATCH 35/51] ovl: encode pure upper file handles Encode overlay file handles as struct ovl_fh containing the file handle encoding of the real upper inode. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/Makefile | 3 +- fs/overlayfs/export.c | 98 ++++++++++++++++++++++++++++++++++++++++ fs/overlayfs/overlayfs.h | 6 +++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 fs/overlayfs/export.c diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 99373bbc1478..30802347a020 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \ + export.o diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c new file mode 100644 index 000000000000..67b907ca9cdc --- /dev/null +++ b/fs/overlayfs/export.c @@ -0,0 +1,98 @@ +/* + * Overlayfs NFS export support. + * + * Amir Goldstein + * + * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "overlayfs.h" + +static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) +{ + struct dentry *upper = ovl_dentry_upper(dentry); + struct dentry *origin = ovl_dentry_lower(dentry); + struct ovl_fh *fh = NULL; + int err; + + /* + * On overlay with an upper layer, overlay root inode is encoded as + * an upper file handle, because upper root dir is not indexed. + */ + if (dentry == dentry->d_sb->s_root && upper) + origin = NULL; + + err = -EACCES; + if (!upper || origin) + goto fail; + + /* TODO: encode non pure-upper by origin */ + fh = ovl_encode_fh(upper, true); + + err = -EOVERFLOW; + if (fh->len > buflen) + goto fail; + + memcpy(buf, (char *)fh, fh->len); + err = fh->len; + +out: + kfree(fh); + return err; + +fail: + pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", + dentry, err, buflen, fh ? (int)fh->len : 0, + fh ? fh->type : 0); + goto out; +} + +static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len) +{ + int res, len = *max_len << 2; + + res = ovl_d_to_fh(dentry, (char *)fid, len); + if (res <= 0) + return FILEID_INVALID; + + len = res; + + /* Round up to dwords */ + *max_len = (len + 3) >> 2; + return OVL_FILEID; +} + +static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len, + struct inode *parent) +{ + struct dentry *dentry; + int type; + + /* TODO: encode connectable file handles */ + if (parent) + return FILEID_INVALID; + + dentry = d_find_any_alias(inode); + if (WARN_ON(!dentry)) + return FILEID_INVALID; + + type = ovl_dentry_to_fh(dentry, fid, max_len); + + dput(dentry); + return type; +} + +const struct export_operations ovl_export_operations = { + .encode_fh = ovl_encode_inode_fh, +}; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2dddcd257eb3..f2baa2ccaacd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -68,6 +68,9 @@ enum ovl_entry_flag { #error Endianness not defined #endif +/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */ +#define OVL_FILEID 0xfb + /* On-disk and in-memeory format for redirect by file handle */ struct ovl_fh { u8 version; /* 0 */ @@ -351,3 +354,6 @@ int ovl_set_attr(struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper); int ovl_set_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper); + +/* export.c */ +extern const struct export_operations ovl_export_operations; From 8556a4205b111c4dac931ee5eba4fcce74c3cb21 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 19 Jan 2018 01:03:23 +0200 Subject: [PATCH 36/51] ovl: decode pure upper file handles Decoding an upper file handle is done by decoding the upper dentry from underlying upper fs, finding or allocating an overlay inode that is hashed by the real upper inode and instantiating an overlay dentry with that inode. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 97 ++++++++++++++++++++++++++++++++++++++++ fs/overlayfs/namei.c | 4 +- fs/overlayfs/overlayfs.h | 2 + 3 files changed, 101 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 67b907ca9cdc..a7d57bf9c9d8 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -93,6 +93,103 @@ static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len, return type; } +/* + * Find or instantiate an overlay dentry from real dentries. + */ +static struct dentry *ovl_obtain_alias(struct super_block *sb, + struct dentry *upper, + struct ovl_path *lowerpath) +{ + struct inode *inode; + struct dentry *dentry; + struct ovl_entry *oe; + void *fsdata = &oe; + + /* TODO: obtain non pure-upper */ + if (lowerpath) + return ERR_PTR(-EIO); + + inode = ovl_get_inode(sb, dget(upper), NULL, NULL, 0); + if (IS_ERR(inode)) { + dput(upper); + return ERR_CAST(inode); + } + + dentry = d_find_any_alias(inode); + if (!dentry) { + dentry = d_alloc_anon(inode->i_sb); + if (!dentry) + goto nomem; + oe = ovl_alloc_entry(0); + if (!oe) + goto nomem; + + dentry->d_fsdata = oe; + ovl_dentry_set_upper_alias(dentry); + } + + return d_instantiate_anon(dentry, inode); + +nomem: + iput(inode); + dput(dentry); + return ERR_PTR(-ENOMEM); +} + +static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, + struct ovl_fh *fh) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct dentry *dentry; + struct dentry *upper; + + if (!ofs->upper_mnt) + return ERR_PTR(-EACCES); + + upper = ovl_decode_fh(fh, ofs->upper_mnt); + if (IS_ERR_OR_NULL(upper)) + return upper; + + dentry = ovl_obtain_alias(sb, upper, NULL); + dput(upper); + + return dentry; +} + +static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct dentry *dentry = NULL; + struct ovl_fh *fh = (struct ovl_fh *) fid; + int len = fh_len << 2; + unsigned int flags = 0; + int err; + + err = -EINVAL; + if (fh_type != OVL_FILEID) + goto out_err; + + err = ovl_check_fh_len(fh, len); + if (err) + goto out_err; + + /* TODO: decode non-upper */ + flags = fh->flags; + if (flags & OVL_FH_FLAG_PATH_UPPER) + dentry = ovl_upper_fh_to_d(sb, fh); + err = PTR_ERR(dentry); + if (IS_ERR(dentry) && err != -ESTALE) + goto out_err; + + return dentry; + +out_err: + pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", + len, fh_type, flags, err); + return ERR_PTR(err); +} + const struct export_operations ovl_export_operations = { .encode_fh = ovl_encode_inode_fh, + .fh_to_dentry = ovl_fh_to_dentry, }; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index ca15893cfaa9..a35c5eaa2c01 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -107,7 +107,7 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry) * Return -ENODATA for "origin unknown". * Return <0 for an invalid file handle. */ -static int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) +int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) { if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len) return -EINVAL; @@ -171,7 +171,7 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) goto out; } -static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) +struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) { struct dentry *real; int bytes; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f2baa2ccaacd..401113a2e9c7 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -264,6 +264,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ +int ovl_check_fh_len(struct ovl_fh *fh, int fh_len); +struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt); int ovl_verify_set_fh(struct dentry *dentry, const char *name, struct dentry *real, bool is_upper, bool set); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); From 3985b70a3e3f58109dc6ae347eafe6e8610be41e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 28 Dec 2017 18:36:16 +0200 Subject: [PATCH 37/51] ovl: decode connected upper dir file handles Until this change, we decoded upper file handles by instantiating an overlay dentry from the real upper dentry. This is sufficient to handle pure upper files, but insufficient to handle merge/impure dirs. To that end, if decoded real upper dir is connected and hashed, we lookup an overlay dentry with the same path as the real upper dir. If decoded real upper is non-dir, we instantiate a disconnected overlay dentry as before this change. Because ovl_fh_to_dentry() returns a connected overlay dir dentry, exportfs never needs to call get_parent() and get_name() to reconnect an upper overlay dir. Because connectable non-dir file handles are not supported, exportfs will not be able to use fh_to_parent() and get_name() methods to reconnect a disconnected non-dir to its parent. Therefore, the methods get_parent() and get_name() are implemented just to print out a sanity warning and the method fh_to_parent() is implemented to warn the user that using the 'subtree_check' exportfs option is not supported. An alternative approach could have been to implement instantiating of an overlay directory inode from origin/index and implement get_parent() and get_name() by calling into underlying fs operations and them instantiating the overlay parent dir. The reasons for not choosing the get_parent() approach were: - Obtaining a disconnected overlay dir dentry would requires a delicate re-factoring of ovl_lookup() to get a dentry with overlay parent info. It was preferred to avoid doing that re-factoring unless it was proven worthy. - Going down the path of disconnected dir would mean that the (non trivial) code path of d_splice_alias() could be traveled and that meant writing more tests and introduces race cases that are very hard to hit on purpose. Taking the path of connecting overlay dentry by forward lookup is therefore the safe and boring way to avoid surprises. The culprits of the chosen "connected overlay dentry" approach: - We need to take special care to rename of ancestors while connecting the overlay dentry by real dentry path. These subtleties are usually handled by generic exportfs and VFS code. - In a hypothetical workload, we could end up in a loop trying to connect, interrupted by rename and restarting connect forever. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 231 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 230 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index a7d57bf9c9d8..09fbfa83eeff 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -136,6 +136,204 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, return ERR_PTR(-ENOMEM); } +/* + * Lookup a child overlay dentry to get a connected overlay dentry whose real + * dentry is @real. If @real is on upper layer, we lookup a child overlay + * dentry with the same name as the real dentry. Otherwise, we need to consult + * index for lookup. + */ +static struct dentry *ovl_lookup_real_one(struct dentry *connected, + struct dentry *real, + struct ovl_layer *layer) +{ + struct inode *dir = d_inode(connected); + struct dentry *this, *parent = NULL; + struct name_snapshot name; + int err; + + /* TODO: lookup by lower real dentry */ + if (layer->idx) + return ERR_PTR(-EACCES); + + /* + * Lookup child overlay dentry by real name. The dir mutex protects us + * from racing with overlay rename. If the overlay dentry that is above + * real has already been moved to a parent that is not under the + * connected overlay dir, we return -ECHILD and restart the lookup of + * connected real path from the top. + */ + inode_lock_nested(dir, I_MUTEX_PARENT); + err = -ECHILD; + parent = dget_parent(real); + if (ovl_dentry_upper(connected) != parent) + goto fail; + + /* + * We also need to take a snapshot of real dentry name to protect us + * from racing with underlying layer rename. In this case, we don't + * care about returning ESTALE, only from dereferencing a free name + * pointer because we hold no lock on the real dentry. + */ + take_dentry_name_snapshot(&name, real); + this = lookup_one_len(name.name, connected, strlen(name.name)); + err = PTR_ERR(this); + if (IS_ERR(this)) { + goto fail; + } else if (!this || !this->d_inode) { + dput(this); + err = -ENOENT; + goto fail; + } else if (ovl_dentry_upper(this) != real) { + dput(this); + err = -ESTALE; + goto fail; + } + +out: + release_dentry_name_snapshot(&name); + dput(parent); + inode_unlock(dir); + return this; + +fail: + pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + real, layer->idx, connected, err); + this = ERR_PTR(err); + goto out; +} + +/* + * Lookup a connected overlay dentry whose real dentry is @real. + * If @real is on upper layer, we lookup a child overlay dentry with the same + * path the real dentry. Otherwise, we need to consult index for lookup. + */ +static struct dentry *ovl_lookup_real(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *connected; + int err = 0; + + /* TODO: use index when looking up by lower real dentry */ + if (layer->idx) + return ERR_PTR(-EACCES); + + connected = dget(sb->s_root); + while (!err) { + struct dentry *next, *this; + struct dentry *parent = NULL; + struct dentry *real_connected = ovl_dentry_upper(connected); + + if (real_connected == real) + break; + + /* Find the topmost dentry not yet connected */ + next = dget(real); + for (;;) { + parent = dget_parent(next); + + if (parent == real_connected) + break; + + /* + * If real has been moved out of 'real_connected', + * we will not find 'real_connected' and hit the layer + * root. In that case, we need to restart connecting. + * This game can go on forever in the worst case. We + * may want to consider taking s_vfs_rename_mutex if + * this happens more than once. + */ + if (parent == layer->mnt->mnt_root) { + dput(connected); + connected = dget(sb->s_root); + break; + } + + /* + * If real file has been moved out of the layer root + * directory, we will eventully hit the real fs root. + * This cannot happen by legit overlay rename, so we + * return error in that case. + */ + if (parent == next) { + err = -EXDEV; + break; + } + + dput(next); + next = parent; + } + + if (!err) { + this = ovl_lookup_real_one(connected, next, layer); + if (IS_ERR(this)) + err = PTR_ERR(this); + + /* + * Lookup of child in overlay can fail when racing with + * overlay rename of child away from 'connected' parent. + * In this case, we need to restart the lookup from the + * top, because we cannot trust that 'real_connected' is + * still an ancestor of 'real'. + */ + if (err == -ECHILD) { + this = dget(sb->s_root); + err = 0; + } + if (!err) { + dput(connected); + connected = this; + } + } + + dput(parent); + dput(next); + } + + if (err) + goto fail; + + return connected; + +fail: + pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + real, layer->idx, connected, err); + dput(connected); + return ERR_PTR(err); +} + +/* + * Get an overlay dentry from upper/lower real dentries. + */ +static struct dentry *ovl_get_dentry(struct super_block *sb, + struct dentry *upper, + struct ovl_path *lowerpath) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + + /* TODO: get non-upper dentry */ + if (!upper) + return ERR_PTR(-EACCES); + + /* + * Obtain a disconnected overlay dentry from a non-dir real upper + * dentry. + */ + if (!d_is_dir(upper)) + return ovl_obtain_alias(sb, upper, NULL); + + /* Removed empty directory? */ + if ((upper->d_flags & DCACHE_DISCONNECTED) || d_unhashed(upper)) + return ERR_PTR(-ENOENT); + + /* + * If real upper dentry is connected and hashed, get a connected + * overlay dentry with the same path as the real upper dentry. + */ + return ovl_lookup_real(sb, upper, &upper_layer); +} + static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, struct ovl_fh *fh) { @@ -150,7 +348,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, if (IS_ERR_OR_NULL(upper)) return upper; - dentry = ovl_obtain_alias(sb, upper, NULL); + dentry = ovl_get_dentry(sb, upper, NULL); dput(upper); return dentry; @@ -189,7 +387,38 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, return ERR_PTR(err); } +static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); + return ERR_PTR(-EACCES); +} + +static int ovl_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + /* + * ovl_fh_to_dentry() returns connected dir overlay dentries and + * ovl_fh_to_parent() is not implemented, so we should not get here. + */ + WARN_ON_ONCE(1); + return -EIO; +} + +static struct dentry *ovl_get_parent(struct dentry *dentry) +{ + /* + * ovl_fh_to_dentry() returns connected dir overlay dentries, so we + * should not get here. + */ + WARN_ON_ONCE(1); + return ERR_PTR(-EIO); +} + const struct export_operations ovl_export_operations = { .encode_fh = ovl_encode_inode_fh, .fh_to_dentry = ovl_fh_to_dentry, + .fh_to_parent = ovl_fh_to_parent, + .get_name = ovl_get_name, + .get_parent = ovl_get_parent, }; From b305e8443f3a87e794927085106db7ebc99a4f74 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 Jan 2018 13:14:55 +0200 Subject: [PATCH 38/51] ovl: encode non-indexed upper file handles We only need to encode origin if there is a chance that the same object was encoded pre copy up and then we need to stay consistent with the same encoding also after copy up. In case a non-pure upper is not indexed, then it was copied up before NFS export support was enabled. In that case, we don't need to worry about staying consistent with pre copy up encoding and we encode an upper file handle. This mitigates the problem that with no index, we cannot find an upper inode from origin inode, so we cannot decode a non-indexed upper from origin file handle. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 09fbfa83eeff..862c368883c9 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -19,6 +19,40 @@ #include #include "overlayfs.h" +/* + * We only need to encode origin if there is a chance that the same object was + * encoded pre copy up and then we need to stay consistent with the same + * encoding also after copy up. If non-pure upper is not indexed, then it was + * copied up before NFS export was enabled. In that case we don't need to worry + * about staying consistent with pre copy up encoding and we encode an upper + * file handle. Overlay root dentry is a private case of non-indexed upper. + * + * The following table summarizes the different file handle encodings used for + * different overlay object types: + * + * Object type | Encoding + * -------------------------------- + * Pure upper | U + * Non-indexed upper | U + * Indexed upper | L + * Non-upper | L + * + * U = upper file handle + * L = lower file handle + */ +static bool ovl_should_encode_origin(struct dentry *dentry) +{ + if (!ovl_dentry_lower(dentry)) + return false; + + /* Decoding a non-indexed upper from origin is not implemented */ + if (ovl_dentry_upper(dentry) && + !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + return false; + + return true; +} + static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) { struct dentry *upper = ovl_dentry_upper(dentry); @@ -26,11 +60,7 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) struct ovl_fh *fh = NULL; int err; - /* - * On overlay with an upper layer, overlay root inode is encoded as - * an upper file handle, because upper root dir is not indexed. - */ - if (dentry == dentry->d_sb->s_root && upper) + if (!ovl_should_encode_origin(dentry)) origin = NULL; err = -EACCES; From 05e1f11816d7952ef26cc37fdd6637f834d675a9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 Jan 2018 13:15:26 +0200 Subject: [PATCH 39/51] ovl: copy up before encoding non-connectable dir file handle Decoding a merge dir, whose origin's parent is under a redirected lower dir is not always possible. As a simple aproximation, we do not encode lower dir file handles when overlay has multiple lower layers and origin is below the topmost lower layer. We should later relax this condition and copy up only the parent that is under a redirected lower. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 55 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 862c368883c9..9da498ea75db 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -34,17 +34,35 @@ * -------------------------------- * Pure upper | U * Non-indexed upper | U - * Indexed upper | L - * Non-upper | L + * Indexed upper | L (*) + * Non-upper | L (*) * * U = upper file handle * L = lower file handle + * + * (*) Connecting an overlay dir from real lower dentry is not always + * possible when there are redirects in lower layers. To mitigate this case, + * we copy up the lower dir first and then encode an upper dir file handle. */ static bool ovl_should_encode_origin(struct dentry *dentry) { + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + if (!ovl_dentry_lower(dentry)) return false; + /* + * Decoding a merge dir, whose origin's parent is under a redirected + * lower dir is not always possible. As a simple aproximation, we do + * not encode lower dir file handles when overlay has multiple lower + * layers and origin is below the topmost lower layer. + * + * TODO: copy up only the parent that is under redirected lower. + */ + if (d_is_dir(dentry) && ofs->upper_mnt && + OVL_E(dentry)->lowerstack[0].layer->idx > 1) + return false; + /* Decoding a non-indexed upper from origin is not implemented */ if (ovl_dentry_upper(dentry) && !ovl_test_flag(OVL_INDEX, d_inode(dentry))) @@ -53,16 +71,43 @@ static bool ovl_should_encode_origin(struct dentry *dentry) return true; } +static int ovl_encode_maybe_copy_up(struct dentry *dentry) +{ + int err; + + if (ovl_dentry_upper(dentry)) + return 0; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = ovl_copy_up(dentry); + + ovl_drop_write(dentry); + return err; +} + static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) { - struct dentry *upper = ovl_dentry_upper(dentry); + struct dentry *upper; struct dentry *origin = ovl_dentry_lower(dentry); struct ovl_fh *fh = NULL; int err; - if (!ovl_should_encode_origin(dentry)) - origin = NULL; + /* + * If we should not encode a lower dir file handle, copy up and encode + * an upper dir file handle. + */ + if (!ovl_should_encode_origin(dentry)) { + err = ovl_encode_maybe_copy_up(dentry); + if (err) + goto fail; + origin = NULL; + } + + upper = ovl_dentry_upper(dentry); err = -EACCES; if (!upper || origin) goto fail; From 03e1c584ffbcb4ce05c3c61b76aceab4c12d6b68 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 28 Dec 2017 19:35:21 +0200 Subject: [PATCH 40/51] ovl: encode lower file handles For indexed or lower non-dir, encode a non-connectable lower file handle from origin inode. For indexed or lower dir, when ofs->numlower == 1, encode a lower file handle from lower dir. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 9da498ea75db..8e37a07b9eff 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -90,7 +90,6 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry) static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) { - struct dentry *upper; struct dentry *origin = ovl_dentry_lower(dentry); struct ovl_fh *fh = NULL; int err; @@ -107,13 +106,8 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) origin = NULL; } - upper = ovl_dentry_upper(dentry); - err = -EACCES; - if (!upper || origin) - goto fail; - - /* TODO: encode non pure-upper by origin */ - fh = ovl_encode_fh(upper, true); + /* Encode an upper or origin file handle */ + fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); err = -EOVERFLOW; if (fh->len > buflen) From f941866fc4a8ad0d0b861cc2dbffa06a9f5e8963 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 19 Jan 2018 21:33:44 +0200 Subject: [PATCH 41/51] ovl: decode lower non-dir file handles Decoding a lower non-dir file handle is done by decoding the lower dentry from underlying lower fs, finding or allocating an overlay inode that is hashed by the real lower inode and instantiating an overlay dentry with that inode. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 53 +++++++++++++++++++++++++++++++--------- fs/overlayfs/namei.c | 7 +++--- fs/overlayfs/overlayfs.h | 2 ++ 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 8e37a07b9eff..8c0172d9b922 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -169,16 +169,16 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, struct dentry *upper, struct ovl_path *lowerpath) { - struct inode *inode; + struct dentry *lower = lowerpath ? lowerpath->dentry : NULL; struct dentry *dentry; + struct inode *inode; struct ovl_entry *oe; - void *fsdata = &oe; - /* TODO: obtain non pure-upper */ - if (lowerpath) + /* TODO: obtain an indexed non-dir upper with origin */ + if (lower && (upper || d_is_dir(lower))) return ERR_PTR(-EIO); - inode = ovl_get_inode(sb, dget(upper), NULL, NULL, 0); + inode = ovl_get_inode(sb, dget(upper), lower, NULL, !!lower); if (IS_ERR(inode)) { dput(upper); return ERR_CAST(inode); @@ -189,12 +189,17 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, dentry = d_alloc_anon(inode->i_sb); if (!dentry) goto nomem; - oe = ovl_alloc_entry(0); + oe = ovl_alloc_entry(lower ? 1 : 0); if (!oe) goto nomem; + if (lower) { + oe->lowerstack->dentry = dget(lower); + oe->lowerstack->layer = lowerpath->layer; + } dentry->d_fsdata = oe; - ovl_dentry_set_upper_alias(dentry); + if (upper) + ovl_dentry_set_upper_alias(dentry); } return d_instantiate_anon(dentry, inode); @@ -381,7 +386,14 @@ static struct dentry *ovl_get_dentry(struct super_block *sb, struct ovl_fs *ofs = sb->s_fs_info; struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; - /* TODO: get non-upper dentry */ + /* + * Obtain a disconnected overlay dentry from a disconnected non-dir + * real lower dentry. + */ + if (!upper && !d_is_dir(lowerpath->dentry)) + return ovl_obtain_alias(sb, NULL, lowerpath); + + /* TODO: lookup connected dir from real lower dir */ if (!upper) return ERR_PTR(-EACCES); @@ -423,6 +435,25 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, return dentry; } +static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, + struct ovl_fh *fh) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_path origin = { }; + struct ovl_path *stack = &origin; + struct dentry *dentry = NULL; + int err; + + err = ovl_check_origin_fh(ofs, fh, NULL, &stack); + if (err) + return ERR_PTR(err); + + dentry = ovl_get_dentry(sb, NULL, &origin); + dput(origin.dentry); + + return dentry; +} + static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { @@ -440,10 +471,10 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, if (err) goto out_err; - /* TODO: decode non-upper */ flags = fh->flags; - if (flags & OVL_FH_FLAG_PATH_UPPER) - dentry = ovl_upper_fh_to_d(sb, fh); + dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ? + ovl_upper_fh_to_d(sb, fh) : + ovl_lower_fh_to_d(sb, fh); err = PTR_ERR(dentry); if (IS_ERR(dentry) && err != -ESTALE) goto out_err; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index a35c5eaa2c01..741a42d974a3 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -310,9 +310,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, - struct dentry *upperdentry, - struct ovl_path **stackp) +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp) { struct dentry *origin = NULL; int i; @@ -328,7 +327,7 @@ static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, else if (IS_ERR(origin)) return PTR_ERR(origin); - if (!ovl_is_whiteout(upperdentry) && + if (upperdentry && !ovl_is_whiteout(upperdentry) && ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT)) goto invalid; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 401113a2e9c7..40ba11e412b1 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -266,6 +266,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ int ovl_check_fh_len(struct ovl_fh *fh, int fh_len); struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt); +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct dentry *dentry, const char *name, struct dentry *real, bool is_upper, bool set); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); From f71bd9cfb692ec80236b186419bf907eb5fa348c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 19 Jan 2018 21:36:20 +0200 Subject: [PATCH 42/51] ovl: decode indexed non-dir file handles Decoding an indexed non-dir file handle is similar to decoding a lower non-dir file handle, but additionally, we lookup the file handle in index dir by name to find the real upper inode. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 71 ++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 8c0172d9b922..f475a10eec07 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -163,27 +163,32 @@ static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len, } /* - * Find or instantiate an overlay dentry from real dentries. + * Find or instantiate an overlay dentry from real dentries and index. */ static struct dentry *ovl_obtain_alias(struct super_block *sb, - struct dentry *upper, - struct ovl_path *lowerpath) + struct dentry *upper_alias, + struct ovl_path *lowerpath, + struct dentry *index) { struct dentry *lower = lowerpath ? lowerpath->dentry : NULL; + struct dentry *upper = upper_alias ?: index; struct dentry *dentry; struct inode *inode; struct ovl_entry *oe; - /* TODO: obtain an indexed non-dir upper with origin */ - if (lower && (upper || d_is_dir(lower))) + /* We get overlay directory dentries with ovl_lookup_real() */ + if (d_is_dir(upper ?: lower)) return ERR_PTR(-EIO); - inode = ovl_get_inode(sb, dget(upper), lower, NULL, !!lower); + inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower); if (IS_ERR(inode)) { dput(upper); return ERR_CAST(inode); } + if (index) + ovl_set_flag(OVL_INDEX, inode); + dentry = d_find_any_alias(inode); if (!dentry) { dentry = d_alloc_anon(inode->i_sb); @@ -198,7 +203,7 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, oe->lowerstack->layer = lowerpath->layer; } dentry->d_fsdata = oe; - if (upper) + if (upper_alias) ovl_dentry_set_upper_alias(dentry); } @@ -377,33 +382,28 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, } /* - * Get an overlay dentry from upper/lower real dentries. + * Get an overlay dentry from upper/lower real dentries and index. */ static struct dentry *ovl_get_dentry(struct super_block *sb, struct dentry *upper, - struct ovl_path *lowerpath) + struct ovl_path *lowerpath, + struct dentry *index) { struct ovl_fs *ofs = sb->s_fs_info; struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct dentry *real = upper ?: (index ?: lowerpath->dentry); /* - * Obtain a disconnected overlay dentry from a disconnected non-dir - * real lower dentry. + * Obtain a disconnected overlay dentry from a non-dir real dentry + * and index. */ - if (!upper && !d_is_dir(lowerpath->dentry)) - return ovl_obtain_alias(sb, NULL, lowerpath); + if (!d_is_dir(real)) + return ovl_obtain_alias(sb, upper, lowerpath, index); /* TODO: lookup connected dir from real lower dir */ if (!upper) return ERR_PTR(-EACCES); - /* - * Obtain a disconnected overlay dentry from a non-dir real upper - * dentry. - */ - if (!d_is_dir(upper)) - return ovl_obtain_alias(sb, upper, NULL); - /* Removed empty directory? */ if ((upper->d_flags & DCACHE_DISCONNECTED) || d_unhashed(upper)) return ERR_PTR(-ENOENT); @@ -429,7 +429,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, if (IS_ERR_OR_NULL(upper)) return upper; - dentry = ovl_get_dentry(sb, upper, NULL); + dentry = ovl_get_dentry(sb, upper, NULL, NULL); dput(upper); return dentry; @@ -442,16 +442,37 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, struct ovl_path origin = { }; struct ovl_path *stack = &origin; struct dentry *dentry = NULL; + struct dentry *index = NULL; int err; + /* First lookup indexed upper by fh */ + if (ofs->indexdir) { + index = ovl_get_index_fh(ofs, fh); + err = PTR_ERR(index); + if (IS_ERR(index)) + return ERR_PTR(err); + } + + /* Then lookup origin by fh */ err = ovl_check_origin_fh(ofs, fh, NULL, &stack); - if (err) - return ERR_PTR(err); + if (err) { + goto out_err; + } else if (index) { + err = ovl_verify_origin(index, origin.dentry, false); + if (err) + goto out_err; + } - dentry = ovl_get_dentry(sb, NULL, &origin); + dentry = ovl_get_dentry(sb, NULL, &origin, index); + +out: dput(origin.dentry); - + dput(index); return dentry; + +out_err: + dentry = ERR_PTR(err); + goto out; } static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, From 9436a1a339fae84698aaa0b66d7a822018388348 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Dec 2017 18:28:04 +0200 Subject: [PATCH 43/51] ovl: decode lower file handles of unlinked but open files Lookup overlay inode in cache by origin inode, so we can decode a file handle of an open file even if the index has a whiteout index entry to mark this overlay inode was unlinked. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 23 +++++++++++++++++++++-- fs/overlayfs/inode.c | 16 ++++++++++++++++ fs/overlayfs/overlayfs.h | 1 + 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index f475a10eec07..0bca38c79244 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -443,14 +443,22 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, struct ovl_path *stack = &origin; struct dentry *dentry = NULL; struct dentry *index = NULL; + struct inode *inode = NULL; + bool is_deleted = false; int err; /* First lookup indexed upper by fh */ if (ofs->indexdir) { index = ovl_get_index_fh(ofs, fh); err = PTR_ERR(index); - if (IS_ERR(index)) - return ERR_PTR(err); + if (IS_ERR(index)) { + if (err != -ESTALE) + return ERR_PTR(err); + + /* Found a whiteout index - treat as deleted inode */ + is_deleted = true; + index = NULL; + } } /* Then lookup origin by fh */ @@ -461,6 +469,16 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, err = ovl_verify_origin(index, origin.dentry, false); if (err) goto out_err; + } else if (is_deleted) { + /* Lookup deleted non-dir by origin inode */ + if (!d_is_dir(origin.dentry)) + inode = ovl_lookup_inode(sb, origin.dentry); + err = -ESTALE; + if (!inode || atomic_read(&inode->i_count) == 1) + goto out_err; + + /* Deleted but still open? */ + index = dget(ovl_i_dentry_upper(inode)); } dentry = ovl_get_dentry(sb, NULL, &origin, index); @@ -468,6 +486,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, out: dput(origin.dentry); dput(index); + iput(inode); return dentry; out_err: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index bfd7c766b5cd..56ba015b9f5e 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -645,6 +645,22 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin) +{ + struct inode *inode, *key = d_inode(origin); + + inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); + if (!inode) + return NULL; + + if (!ovl_verify_inode(inode, origin, NULL)) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + return inode; +} + struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct dentry *lowerdentry, struct dentry *index, unsigned int numlower) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 40ba11e412b1..a47f9142b6be 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -322,6 +322,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin); struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct dentry *lowerdentry, struct dentry *index, unsigned int numlower); From 3b0bfc6ed3c434800e5eacfb6cdbe45c07c270e1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Dec 2017 18:42:16 +0200 Subject: [PATCH 44/51] ovl: decode indexed dir file handles Decoding an indexed dir file handle is done by looking up the file handle in index dir by name and then decoding the upper dir from the index origin file handle. The decoded upper path is used to lookup an overlay dentry of the same path. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 13 +++++++++++++ fs/overlayfs/namei.c | 2 +- fs/overlayfs/overlayfs.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 0bca38c79244..7a4b6a0fd527 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -461,6 +461,19 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, } } + /* Then try to get upper dir by index */ + if (index && d_is_dir(index)) { + struct dentry *upper = ovl_index_upper(ofs, index); + + err = PTR_ERR(upper); + if (IS_ERR_OR_NULL(upper)) + goto out_err; + + dentry = ovl_get_dentry(sb, upper, NULL, NULL); + dput(upper); + goto out; + } + /* Then lookup origin by fh */ err = ovl_check_origin_fh(ofs, fh, NULL, &stack); if (err) { diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 741a42d974a3..6199bf7a77c7 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -439,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name, } /* Get upper dentry from index */ -static struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) { struct ovl_fh *fh; struct dentry *upper; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index a47f9142b6be..a5d415aec131 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -270,6 +270,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct dentry *dentry, const char *name, struct dentry *real, bool is_upper, bool set); +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); From 988925164f659bf74061d3036e14873753c937d2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 17 Jan 2018 22:32:44 +0200 Subject: [PATCH 45/51] ovl: decode pure lower dir file handles Similar to decoding a pure upper dir file handle, decoding a pure lower dir file handle is implemented by looking an overlay dentry of the same path as the pure lower path and verifying that the overlay dentry's real lower matches the decoded real lower file handle. Unlike the case of upper dir file handle, the lookup of overlay path by lower real path can fail or find a mismatched overlay dentry if any of the lower parents have been copied up and renamed. To address this case we will need to check if any of the lower parents are indexed. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 7a4b6a0fd527..361174810ce8 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -215,6 +215,23 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, return ERR_PTR(-ENOMEM); } +/* Get the upper or lower dentry in stach whose on layer @idx */ +static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx) +{ + struct ovl_entry *oe = dentry->d_fsdata; + int i; + + if (!idx) + return ovl_dentry_upper(dentry); + + for (i = 0; i < oe->numlower; i++) { + if (oe->lowerstack[i].layer->idx == idx) + return oe->lowerstack[i].dentry; + } + + return NULL; +} + /* * Lookup a child overlay dentry to get a connected overlay dentry whose real * dentry is @real. If @real is on upper layer, we lookup a child overlay @@ -230,10 +247,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, struct name_snapshot name; int err; - /* TODO: lookup by lower real dentry */ - if (layer->idx) - return ERR_PTR(-EACCES); - /* * Lookup child overlay dentry by real name. The dir mutex protects us * from racing with overlay rename. If the overlay dentry that is above @@ -244,7 +257,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, inode_lock_nested(dir, I_MUTEX_PARENT); err = -ECHILD; parent = dget_parent(real); - if (ovl_dentry_upper(connected) != parent) + if (ovl_dentry_real_at(connected, layer->idx) != parent) goto fail; /* @@ -262,7 +275,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, dput(this); err = -ENOENT; goto fail; - } else if (ovl_dentry_upper(this) != real) { + } else if (ovl_dentry_real_at(this, layer->idx) != real) { dput(this); err = -ESTALE; goto fail; @@ -294,14 +307,13 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, int err = 0; /* TODO: use index when looking up by lower real dentry */ - if (layer->idx) - return ERR_PTR(-EACCES); connected = dget(sb->s_root); while (!err) { struct dentry *next, *this; struct dentry *parent = NULL; - struct dentry *real_connected = ovl_dentry_upper(connected); + struct dentry *real_connected = ovl_dentry_real_at(connected, + layer->idx); if (real_connected == real) break; @@ -391,6 +403,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb, { struct ovl_fs *ofs = sb->s_fs_info; struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer; struct dentry *real = upper ?: (index ?: lowerpath->dentry); /* @@ -400,19 +413,15 @@ static struct dentry *ovl_get_dentry(struct super_block *sb, if (!d_is_dir(real)) return ovl_obtain_alias(sb, upper, lowerpath, index); - /* TODO: lookup connected dir from real lower dir */ - if (!upper) - return ERR_PTR(-EACCES); - /* Removed empty directory? */ - if ((upper->d_flags & DCACHE_DISCONNECTED) || d_unhashed(upper)) + if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real)) return ERR_PTR(-ENOENT); /* - * If real upper dentry is connected and hashed, get a connected - * overlay dentry with the same path as the real upper dentry. + * If real dentry is connected and hashed, get a connected overlay + * dentry whose real dentry is @real. */ - return ovl_lookup_real(sb, upper, &upper_layer); + return ovl_lookup_real(sb, real, layer); } static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, From 7a9dadef9684aaf738e7ce7e2a9284cc5e165ebc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 10 Jul 2017 15:55:55 +0300 Subject: [PATCH 46/51] ovl: hash non-indexed dir by upper inode for NFS export Non-indexed upper dirs are encoded as upper file handles. When NFS export is enabled, hash non-indexed directory inodes by upper inode, so we can find them in inode cache using the decoded upper inode. When NFS export is disabled, directories are not indexed on copy up, so hash non-indexed directory inodes by origin inode, the same hash key that is used before copy up. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 56ba015b9f5e..416dc06835db 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -665,6 +665,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct dentry *lowerdentry, struct dentry *index, unsigned int numlower) { + struct ovl_fs *ofs = sb->s_fs_info; struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; /* Already indexed or could be indexed on copy up? */ @@ -684,9 +685,10 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, * Hash non-dir that is or could be indexed by origin inode. * Hash dir that is or could be merged by origin inode. * Hash pure upper and non-indexed non-dir by upper inode. + * Hash non-indexed dir by upper inode for NFS export. */ is_dir = S_ISDIR(realinode->i_mode); - if (is_dir) + if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt)) origin = lowerdentry; if (upperdentry || origin) { From 4b91c30a5a19332e8dd10b601d05b72caf657730 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 Jan 2018 16:39:13 +0200 Subject: [PATCH 47/51] ovl: lookup connected ancestor of dir in inode cache Decoding a dir file handle requires walking backward up to layer root and for lower dir also checking the index to see if any of the parents have been copied up. Lookup overlay ancestor dentry in inode/dentry cache by decoded real parents to shortcut looking up all the way back to layer root. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 99 +++++++++++++++++++++++++++++++++++++--- fs/overlayfs/inode.c | 21 ++++++--- fs/overlayfs/overlayfs.h | 3 +- 3 files changed, 110 insertions(+), 13 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 361174810ce8..092e6e8c9258 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -294,6 +294,88 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, goto out; } +/* + * Lookup an indexed or hashed overlay dentry by real inode. + */ +static struct dentry *ovl_lookup_real_inode(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *this = NULL; + struct inode *inode; + + inode = ovl_lookup_inode(sb, real, !layer->idx); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (inode) { + this = d_find_any_alias(inode); + iput(inode); + } + + /* TODO: use index when looking up by origin inode */ + if (!this) + return NULL; + + if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { + dput(this); + this = ERR_PTR(-EIO); + } + + return this; +} + +/* + * Lookup an indexed or hashed overlay dentry, whose real dentry is an + * ancestor of @real. + */ +static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *next, *parent = NULL; + struct dentry *ancestor = ERR_PTR(-EIO); + + if (real == layer->mnt->mnt_root) + return dget(sb->s_root); + + /* Find the topmost indexed or hashed ancestor */ + next = dget(real); + for (;;) { + parent = dget_parent(next); + + /* + * Lookup a matching overlay dentry in inode/dentry + * cache or in index by real inode. + */ + ancestor = ovl_lookup_real_inode(sb, next, layer); + if (ancestor) + break; + + if (parent == layer->mnt->mnt_root) { + ancestor = dget(sb->s_root); + break; + } + + /* + * If @real has been moved out of the layer root directory, + * we will eventully hit the real fs root. This cannot happen + * by legit overlay rename, so we return error in that case. + */ + if (parent == next) { + ancestor = ERR_PTR(-EXDEV); + break; + } + + dput(next); + next = parent; + } + + dput(parent); + dput(next); + + return ancestor; +} + /* * Lookup a connected overlay dentry whose real dentry is @real. * If @real is on upper layer, we lookup a child overlay dentry with the same @@ -306,9 +388,10 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, struct dentry *connected; int err = 0; - /* TODO: use index when looking up by lower real dentry */ + connected = ovl_lookup_real_ancestor(sb, real, layer); + if (IS_ERR(connected)) + return connected; - connected = dget(sb->s_root); while (!err) { struct dentry *next, *this; struct dentry *parent = NULL; @@ -365,11 +448,15 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, * overlay rename of child away from 'connected' parent. * In this case, we need to restart the lookup from the * top, because we cannot trust that 'real_connected' is - * still an ancestor of 'real'. + * still an ancestor of 'real'. There is a good chance + * that the renamed overlay ancestor is now in cache, so + * ovl_lookup_real_ancestor() will find it and we can + * continue to connect exactly from where lookup failed. */ if (err == -ECHILD) { - this = dget(sb->s_root); - err = 0; + this = ovl_lookup_real_ancestor(sb, real, + layer); + err = IS_ERR(this) ? PTR_ERR(this) : 0; } if (!err) { dput(connected); @@ -494,7 +581,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, } else if (is_deleted) { /* Lookup deleted non-dir by origin inode */ if (!d_is_dir(origin.dentry)) - inode = ovl_lookup_inode(sb, origin.dentry); + inode = ovl_lookup_inode(sb, origin.dentry, false); err = -ESTALE; if (!inode || atomic_read(&inode->i_count) == 1) goto out_err; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 416dc06835db..fcd97b783fa1 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -614,9 +614,15 @@ static int ovl_inode_set(struct inode *inode, void *data) } static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, - struct dentry *upperdentry) + struct dentry *upperdentry, bool strict) { - if (S_ISDIR(inode->i_mode)) { + /* + * For directories, @strict verify from lookup path performs consistency + * checks, so NULL lower/upper in dentry must match NULL lower/upper in + * inode. Non @strict verify from NFS handle decode path passes NULL for + * 'unknown' lower/upper. + */ + if (S_ISDIR(inode->i_mode) && strict) { /* Real lower dir moved to upper layer under us? */ if (!lowerdentry && ovl_inode_lower(inode)) return false; @@ -645,15 +651,17 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin) +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, + bool is_upper) { - struct inode *inode, *key = d_inode(origin); + struct inode *inode, *key = d_inode(real); inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); if (!inode) return NULL; - if (!ovl_verify_inode(inode, origin, NULL)) { + if (!ovl_verify_inode(inode, is_upper ? NULL : real, + is_upper ? real : NULL, false)) { iput(inode); return ERR_PTR(-ESTALE); } @@ -704,7 +712,8 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, * Verify that the underlying files stored in the inode * match those in the dentry. */ - if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { + if (!ovl_verify_inode(inode, lowerdentry, upperdentry, + true)) { iput(inode); inode = ERR_PTR(-ESTALE); goto out; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index a5d415aec131..bf17bf97c50f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -323,7 +323,8 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin); +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, + bool is_upper); struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct dentry *lowerdentry, struct dentry *index, unsigned int numlower); From 061701540349c30d72e48a201449a840c77ad509 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 17 Jan 2018 14:40:27 +0200 Subject: [PATCH 48/51] ovl: lookup indexed ancestor of lower dir ovl_lookup_real() in lower layer walks back lower parents to find the topmost indexed parent. If an indexed ancestor is found before reaching lower layer root, ovl_lookup_real() is called recursively with upper layer to walk back from indexed upper to the topmost connected/hashed upper parent (or up to root). ovl_lookup_real() in upper layer then walks forward to connect the topmost upper overlay dir dentry and ovl_lookup_real() in lower layer continues to walk forward to connect the decoded lower overlay dir dentry. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 41 +++++++++++++++++++++++++++++++++++++++- fs/overlayfs/namei.c | 20 ++++++++++++++------ fs/overlayfs/overlayfs.h | 2 ++ 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 092e6e8c9258..b65ea49de457 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -294,6 +294,10 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, goto out; } +static struct dentry *ovl_lookup_real(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer); + /* * Lookup an indexed or hashed overlay dentry by real inode. */ @@ -301,9 +305,16 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, struct dentry *real, struct ovl_layer *layer) { + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct dentry *index = NULL; struct dentry *this = NULL; struct inode *inode; + /* + * Decoding upper dir from index is expensive, so first try to lookup + * overlay dentry in inode/dcache. + */ inode = ovl_lookup_inode(sb, real, !layer->idx); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -312,7 +323,35 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, iput(inode); } - /* TODO: use index when looking up by origin inode */ + /* + * For decoded lower dir file handle, lookup index by origin to check + * if lower dir was copied up and and/or removed. + */ + if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) { + index = ovl_lookup_index(ofs, NULL, real, false); + if (IS_ERR(index)) + return index; + } + + /* Get connected upper overlay dir from index */ + if (index) { + struct dentry *upper = ovl_index_upper(ofs, index); + + dput(index); + if (IS_ERR_OR_NULL(upper)) + return upper; + + /* + * ovl_lookup_real() in lower layer may call recursively once to + * ovl_lookup_real() in upper layer. The first level call walks + * back lower parents to the topmost indexed parent. The second + * recursive call walks back from indexed upper to the topmost + * connected/hashed upper parent (or up to root). + */ + this = ovl_lookup_real(sb, upper, &upper_layer); + dput(upper); + } + if (!this) return NULL; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 6199bf7a77c7..c5449efd96d5 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -661,11 +661,9 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh) return ERR_PTR(err); } -static struct dentry *ovl_lookup_index(struct dentry *dentry, - struct dentry *upper, - struct dentry *origin) +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct dentry *index; struct inode *inode; struct qstr name; @@ -693,6 +691,16 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { goto out_dput; + } else if (ovl_is_whiteout(index) && !verify) { + /* + * When index lookup is called with !verify for decoding an + * overlay file handle, a whiteout index implies that decode + * should treat file handle as stale and no need to print a + * warning about it. + */ + dput(index); + index = ERR_PTR(-ESTALE); + goto out; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -706,7 +714,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; - } else if (is_dir) { + } else if (is_dir && verify) { if (!upper) { pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", origin, index); @@ -943,7 +951,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (origin && ovl_indexdir(dentry->d_sb) && (!d.is_dir || ovl_index_all(dentry->d_sb))) { - index = ovl_lookup_index(dentry, upperdentry, origin); + index = ovl_lookup_index(ofs, upperdentry, origin, true); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index bf17bf97c50f..0df25a9c94bd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -274,6 +274,8 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); From 8383f1748829e1a6a07988863ed13a47fb653387 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 2 Oct 2017 11:31:42 +0300 Subject: [PATCH 49/51] ovl: wire up NFS export operations Now that NFS export operations are implemented, enable overlayfs NFS export support if the "nfs_export" feature is enabled. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fccdcfae68e9..9ee37c76091d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1331,6 +1331,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } } + if (ofs->config.nfs_export) + sb->s_export_op = &ovl_export_operations; + /* Never override disk quota limits or use reserved space */ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); From 2aed489d163a6559e07dbc238882c9970ae0f65b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 28 Jan 2018 02:35:48 +0200 Subject: [PATCH 50/51] ovl: fix regression in fsnotify of overlay merge dir A re-factoring patch in NFS export series has passed the wrong argument to ovl_get_inode() causing a regression in the very recent fix to fsnotify of overlay merge dir. The regression has caused merge directory inodes to be hashed by upper instead of lower real inode, when NFS export and directory indexing is disabled. That caused an inotify watch to become obsolete after directory copy up and drop caches. LTP test inotify07 was improved to catch this regression. The regression also caused multiple redirect dirs to same origin not to be detected on lookup with NFS export disabled. An xfstest was added to cover this case. Fixes: 0aceb53e73be ("ovl: do not pass overlay dentry to ovl_get_inode()") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index c5449efd96d5..de3e6da1d5a5 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -976,6 +976,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperdentry = dget(index); if (upperdentry || ctr) { + if (ctr) + origin = stack[0].dentry; inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index, ctr); err = PTR_ERR(inode); From 9b6faee074702bbbc207e7027b9416c2d8fea9fe Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Jan 2018 13:54:45 +0200 Subject: [PATCH 51/51] ovl: check ERR_PTR() return value from ovl_encode_fh() Another fix for an issue reported by 0-day robot. Reported-by: Dan Carpenter Fixes: 8ed5eec9d6c4 ("ovl: encode pure upper file handles") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index b65ea49de457..bb94ce9da5c8 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -108,6 +108,9 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) /* Encode an upper or origin file handle */ fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); + err = PTR_ERR(fh); + if (IS_ERR(fh)) + goto fail; err = -EOVERFLOW; if (fh->len > buflen)