Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs d_inode/d_flags memory ordering fixes from Al Viro: "Fallout from tree-wide audit for ->d_inode/->d_flags barriers use. Basically, the problem is that negative pinned dentries require careful treatment - unless ->d_lock is locked or parent is held at least shared, another thread can make them positive right under us. Most of the uses turned out to be safe - the main surprises as far as filesystems are concerned were - race in dget_parent() fastpath, that might end up with the caller observing the returned dentry _negative_, due to insufficient barriers. It is positive in memory, but we could end up seeing the wrong value of ->d_inode in CPU cache. Fixed. - manual checks that result of lookup_one_len_unlocked() is positive (and rejection of negatives). Again, insufficient barriers (we might end up with inconsistent observed values of ->d_inode and ->d_flags). Fixed by switching to a new primitive that does the checks itself and returns ERR_PTR(-ENOENT) instead of a negative dentry. That way we get rid of boilerplate converting negatives into ERR_PTR(-ENOENT) in the callers and have a single place to deal with the barrier-related mess - inside fs/namei.c rather than in every caller out there. The guts of pathname resolution *do* need to be careful - the race found by Ritesh is real, as well as several similar races. Fortunately, it turns out that we can take care of that with fairly local changes in there. The tree-wide audit had not been fun, and I hate the idea of repeating it. I think the right approach would be to annotate the places where we are _not_ guaranteed ->d_inode/->d_flags stability and have sparse catch regressions. But I'm still not sure what would be the least invasive way of doing that and it's clearly the next cycle fodder" * 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: fs/namei.c: fix missing barriers when checking positivity fix dget_parent() fastpath race new helper: lookup_positive_unlocked() fs/namei.c: pull positivity check into follow_managed()
This commit is contained in:
commit
0aecba6173
|
@ -730,11 +730,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
|
|||
struct inode *dir = d_inode(dentry);
|
||||
struct dentry *child;
|
||||
|
||||
if (!dir) {
|
||||
dput(dentry);
|
||||
dentry = ERR_PTR(-ENOENT);
|
||||
break;
|
||||
}
|
||||
if (!S_ISDIR(dir->i_mode)) {
|
||||
dput(dentry);
|
||||
dentry = ERR_PTR(-ENOTDIR);
|
||||
|
@ -751,7 +746,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
|
|||
while (*s && *s != sep)
|
||||
s++;
|
||||
|
||||
child = lookup_one_len_unlocked(p, dentry, s - p);
|
||||
child = lookup_positive_unlocked(p, dentry, s - p);
|
||||
dput(dentry);
|
||||
dentry = child;
|
||||
} while (!IS_ERR(dentry));
|
||||
|
|
|
@ -319,7 +319,7 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
|
|||
flags = READ_ONCE(dentry->d_flags);
|
||||
flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
|
||||
flags |= type_flags;
|
||||
WRITE_ONCE(dentry->d_flags, flags);
|
||||
smp_store_release(&dentry->d_flags, flags);
|
||||
}
|
||||
|
||||
static inline void __d_clear_type_and_inode(struct dentry *dentry)
|
||||
|
@ -903,17 +903,19 @@ struct dentry *dget_parent(struct dentry *dentry)
|
|||
{
|
||||
int gotref;
|
||||
struct dentry *ret;
|
||||
unsigned seq;
|
||||
|
||||
/*
|
||||
* Do optimistic parent lookup without any
|
||||
* locking.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
seq = raw_seqcount_begin(&dentry->d_seq);
|
||||
ret = READ_ONCE(dentry->d_parent);
|
||||
gotref = lockref_get_not_zero(&ret->d_lockref);
|
||||
rcu_read_unlock();
|
||||
if (likely(gotref)) {
|
||||
if (likely(ret == READ_ONCE(dentry->d_parent)))
|
||||
if (!read_seqcount_retry(&dentry->d_seq, seq))
|
||||
return ret;
|
||||
dput(ret);
|
||||
}
|
||||
|
|
|
@ -299,13 +299,9 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
|
|||
if (!parent)
|
||||
parent = debugfs_mount->mnt_root;
|
||||
|
||||
dentry = lookup_one_len_unlocked(name, parent, strlen(name));
|
||||
dentry = lookup_positive_unlocked(name, parent, strlen(name));
|
||||
if (IS_ERR(dentry))
|
||||
return NULL;
|
||||
if (!d_really_is_positive(dentry)) {
|
||||
dput(dentry);
|
||||
return NULL;
|
||||
}
|
||||
return dentry;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(debugfs_lookup);
|
||||
|
|
|
@ -223,7 +223,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
|
|||
dput(dentry);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
dtmp = lookup_one_len_unlocked(kntmp->name, dentry,
|
||||
dtmp = lookup_positive_unlocked(kntmp->name, dentry,
|
||||
strlen(kntmp->name));
|
||||
dput(dentry);
|
||||
if (IS_ERR(dtmp))
|
||||
|
|
56
fs/namei.c
56
fs/namei.c
|
@ -1210,25 +1210,25 @@ static int follow_automount(struct path *path, struct nameidata *nd,
|
|||
* - Flagged as automount point
|
||||
*
|
||||
* This may only be called in refwalk mode.
|
||||
* On success path->dentry is known positive.
|
||||
*
|
||||
* Serialization is taken care of in namespace.c
|
||||
*/
|
||||
static int follow_managed(struct path *path, struct nameidata *nd)
|
||||
{
|
||||
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
|
||||
unsigned managed;
|
||||
unsigned flags;
|
||||
bool need_mntput = false;
|
||||
int ret = 0;
|
||||
|
||||
/* Given that we're not holding a lock here, we retain the value in a
|
||||
* local variable for each dentry as we look at it so that we don't see
|
||||
* the components of that value change under us */
|
||||
while (managed = READ_ONCE(path->dentry->d_flags),
|
||||
managed &= DCACHE_MANAGED_DENTRY,
|
||||
unlikely(managed != 0)) {
|
||||
while (flags = smp_load_acquire(&path->dentry->d_flags),
|
||||
unlikely(flags & DCACHE_MANAGED_DENTRY)) {
|
||||
/* Allow the filesystem to manage the transit without i_mutex
|
||||
* being held. */
|
||||
if (managed & DCACHE_MANAGE_TRANSIT) {
|
||||
if (flags & DCACHE_MANAGE_TRANSIT) {
|
||||
BUG_ON(!path->dentry->d_op);
|
||||
BUG_ON(!path->dentry->d_op->d_manage);
|
||||
ret = path->dentry->d_op->d_manage(path, false);
|
||||
|
@ -1237,7 +1237,7 @@ static int follow_managed(struct path *path, struct nameidata *nd)
|
|||
}
|
||||
|
||||
/* Transit to a mounted filesystem. */
|
||||
if (managed & DCACHE_MOUNTED) {
|
||||
if (flags & DCACHE_MOUNTED) {
|
||||
struct vfsmount *mounted = lookup_mnt(path);
|
||||
if (mounted) {
|
||||
dput(path->dentry);
|
||||
|
@ -1256,7 +1256,7 @@ static int follow_managed(struct path *path, struct nameidata *nd)
|
|||
}
|
||||
|
||||
/* Handle an automount point */
|
||||
if (managed & DCACHE_NEED_AUTOMOUNT) {
|
||||
if (flags & DCACHE_NEED_AUTOMOUNT) {
|
||||
ret = follow_automount(path, nd, &need_mntput);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
@ -1269,10 +1269,12 @@ static int follow_managed(struct path *path, struct nameidata *nd)
|
|||
|
||||
if (need_mntput && path->mnt == mnt)
|
||||
mntput(path->mnt);
|
||||
if (ret == -EISDIR || !ret)
|
||||
ret = 1;
|
||||
if (need_mntput)
|
||||
nd->flags |= LOOKUP_JUMPED;
|
||||
if (ret == -EISDIR || !ret)
|
||||
ret = 1;
|
||||
if (ret > 0 && unlikely(d_flags_negative(flags)))
|
||||
ret = -ENOENT;
|
||||
if (unlikely(ret < 0))
|
||||
path_put_conditional(path, nd);
|
||||
return ret;
|
||||
|
@ -1621,10 +1623,6 @@ static int lookup_fast(struct nameidata *nd,
|
|||
dput(dentry);
|
||||
return status;
|
||||
}
|
||||
if (unlikely(d_is_negative(dentry))) {
|
||||
dput(dentry);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
path->mnt = mnt;
|
||||
path->dentry = dentry;
|
||||
|
@ -1811,11 +1809,6 @@ static int walk_component(struct nameidata *nd, int flags)
|
|||
if (unlikely(err < 0))
|
||||
return err;
|
||||
|
||||
if (unlikely(d_is_negative(path.dentry))) {
|
||||
path_to_nameidata(&path, nd);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
seq = 0; /* we are already out of RCU mode */
|
||||
inode = d_backing_inode(path.dentry);
|
||||
}
|
||||
|
@ -2568,6 +2561,26 @@ struct dentry *lookup_one_len_unlocked(const char *name,
|
|||
}
|
||||
EXPORT_SYMBOL(lookup_one_len_unlocked);
|
||||
|
||||
/*
|
||||
* Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
|
||||
* on negatives. Returns known positive or ERR_PTR(); that's what
|
||||
* most of the users want. Note that pinned negative with unlocked parent
|
||||
* _can_ become positive at any time, so callers of lookup_one_len_unlocked()
|
||||
* need to be very careful; pinned positives have ->d_inode stable, so
|
||||
* this one avoids such problems.
|
||||
*/
|
||||
struct dentry *lookup_positive_unlocked(const char *name,
|
||||
struct dentry *base, int len)
|
||||
{
|
||||
struct dentry *ret = lookup_one_len_unlocked(name, base, len);
|
||||
if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
|
||||
dput(ret);
|
||||
ret = ERR_PTR(-ENOENT);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(lookup_positive_unlocked);
|
||||
|
||||
#ifdef CONFIG_UNIX98_PTYS
|
||||
int path_pts(struct path *path)
|
||||
{
|
||||
|
@ -2662,7 +2675,7 @@ mountpoint_last(struct nameidata *nd)
|
|||
return PTR_ERR(path.dentry);
|
||||
}
|
||||
}
|
||||
if (d_is_negative(path.dentry)) {
|
||||
if (d_flags_negative(smp_load_acquire(&path.dentry->d_flags))) {
|
||||
dput(path.dentry);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
@ -3356,11 +3369,6 @@ static int do_last(struct nameidata *nd,
|
|||
if (unlikely(error < 0))
|
||||
return error;
|
||||
|
||||
if (unlikely(d_is_negative(path.dentry))) {
|
||||
path_to_nameidata(&path, nd);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* create/update audit record if it already exists.
|
||||
*/
|
||||
|
|
|
@ -863,13 +863,11 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
|
|||
} else
|
||||
dchild = dget(dparent);
|
||||
} else
|
||||
dchild = lookup_one_len_unlocked(name, dparent, namlen);
|
||||
dchild = lookup_positive_unlocked(name, dparent, namlen);
|
||||
if (IS_ERR(dchild))
|
||||
return rv;
|
||||
if (d_mountpoint(dchild))
|
||||
goto out;
|
||||
if (d_really_is_negative(dchild))
|
||||
goto out;
|
||||
if (dchild->d_inode->i_ino != ino)
|
||||
goto out;
|
||||
rv = fh_compose(fhp, exp, dchild, &cd->fh);
|
||||
|
|
|
@ -2991,18 +2991,9 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
|
|||
__be32 nfserr;
|
||||
int ignore_crossmnt = 0;
|
||||
|
||||
dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
|
||||
dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
|
||||
if (IS_ERR(dentry))
|
||||
return nfserrno(PTR_ERR(dentry));
|
||||
if (d_really_is_negative(dentry)) {
|
||||
/*
|
||||
* we're not holding the i_mutex here, so there's
|
||||
* a window where this directory entry could have gone
|
||||
* away.
|
||||
*/
|
||||
dput(dentry);
|
||||
return nfserr_noent;
|
||||
}
|
||||
|
||||
exp_get(exp);
|
||||
/*
|
||||
|
|
|
@ -200,7 +200,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
|
|||
int err;
|
||||
bool last_element = !post[0];
|
||||
|
||||
this = lookup_one_len_unlocked(name, base, namelen);
|
||||
this = lookup_positive_unlocked(name, base, namelen);
|
||||
if (IS_ERR(this)) {
|
||||
err = PTR_ERR(this);
|
||||
this = NULL;
|
||||
|
@ -208,8 +208,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
|
|||
goto out;
|
||||
goto out_err;
|
||||
}
|
||||
if (!this->d_inode)
|
||||
goto put_and_out;
|
||||
|
||||
if (ovl_dentry_weird(this)) {
|
||||
/* Don't support traversing automounts and other weirdness */
|
||||
|
@ -651,7 +649,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
|
|||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
|
||||
index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
|
||||
kfree(name.name);
|
||||
if (IS_ERR(index)) {
|
||||
if (PTR_ERR(index) == -ENOENT)
|
||||
|
@ -659,9 +657,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
|
|||
return index;
|
||||
}
|
||||
|
||||
if (d_is_negative(index))
|
||||
err = 0;
|
||||
else if (ovl_is_whiteout(index))
|
||||
if (ovl_is_whiteout(index))
|
||||
err = -ESTALE;
|
||||
else if (ovl_dentry_weird(index))
|
||||
err = -EIO;
|
||||
|
@ -685,7 +681,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
|
|||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
|
||||
index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
|
||||
if (IS_ERR(index)) {
|
||||
err = PTR_ERR(index);
|
||||
if (err == -ENOENT) {
|
||||
|
@ -700,9 +696,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
|
|||
}
|
||||
|
||||
inode = d_inode(index);
|
||||
if (d_is_negative(index)) {
|
||||
goto out_dput;
|
||||
} else if (ovl_is_whiteout(index) && !verify) {
|
||||
if (ovl_is_whiteout(index) && !verify) {
|
||||
/*
|
||||
* When index lookup is called with !verify for decoding an
|
||||
* overlay file handle, a whiteout index implies that decode
|
||||
|
@ -1131,7 +1125,7 @@ bool ovl_lower_positive(struct dentry *dentry)
|
|||
struct dentry *this;
|
||||
struct dentry *lowerdir = poe->lowerstack[i].dentry;
|
||||
|
||||
this = lookup_one_len_unlocked(name->name, lowerdir,
|
||||
this = lookup_positive_unlocked(name->name, lowerdir,
|
||||
name->len);
|
||||
if (IS_ERR(this)) {
|
||||
switch (PTR_ERR(this)) {
|
||||
|
@ -1148,10 +1142,8 @@ bool ovl_lower_positive(struct dentry *dentry)
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
if (this->d_inode) {
|
||||
positive = !ovl_is_whiteout(this);
|
||||
done = true;
|
||||
}
|
||||
positive = !ovl_is_whiteout(this);
|
||||
done = true;
|
||||
dput(this);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2487,21 +2487,15 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
|
|||
struct dentry *dentry;
|
||||
int error;
|
||||
|
||||
dentry = lookup_one_len_unlocked(qf_name, sb->s_root, strlen(qf_name));
|
||||
dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name));
|
||||
if (IS_ERR(dentry))
|
||||
return PTR_ERR(dentry);
|
||||
|
||||
if (d_really_is_negative(dentry)) {
|
||||
error = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = security_quota_on(dentry);
|
||||
if (!error)
|
||||
error = dquot_load_quota_inode(d_inode(dentry), type, format_id,
|
||||
DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
|
||||
|
||||
out:
|
||||
dput(dentry);
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -440,6 +440,11 @@ static inline bool d_is_negative(const struct dentry *dentry)
|
|||
return d_is_miss(dentry);
|
||||
}
|
||||
|
||||
static inline bool d_flags_negative(unsigned flags)
|
||||
{
|
||||
return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE;
|
||||
}
|
||||
|
||||
static inline bool d_is_positive(const struct dentry *dentry)
|
||||
{
|
||||
return !d_is_negative(dentry);
|
||||
|
|
|
@ -60,6 +60,7 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
|
|||
extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
|
||||
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
|
||||
extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
|
||||
extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
|
||||
|
||||
extern int follow_down_one(struct path *);
|
||||
extern int follow_down(struct path *);
|
||||
|
|
Loading…
Reference in New Issue