From 7508abc4bdac43dc87d2fdd31527063f72da7020 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 18 Dec 2015 11:54:55 -0600 Subject: [PATCH 1/6] GFS2: Check if iopen is held when deleting inode This patch fixes an error condition in which an inode is partially created in gfs2_create_inode() but then some error is discovered, which causes it to fail and call iput() before the iopen glock is created or held. In that case, gfs2_delete_inode would try to unlock an iopen glock that doesn't yet exist. Therefore, we test its holder (which must exist) for the HIF_HOLDER bit before trying to dq it. Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/glock.c | 1 + fs/gfs2/super.c | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a4ff7b56f5cd..5788ebff716a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1015,6 +1015,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) handle_callback(gl, LM_ST_UNLOCKED, 0, false); list_del_init(&gh->gh_list); + clear_bit(HIF_HOLDER, &gh->gh_iflags); if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 8f960a51a9a0..f8a0cd821290 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1551,12 +1551,16 @@ static void gfs2_evict_inode(struct inode *inode) goto out_truncate; } - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); - error = gfs2_glock_nq(&ip->i_iopen_gh); - if (error) - goto out_truncate; + if (ip->i_iopen_gh.gh_gl && + test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, + &ip->i_iopen_gh); + error = gfs2_glock_nq(&ip->i_iopen_gh); + if (error) + goto out_truncate; + } /* Case 1 starts here */ @@ -1606,11 +1610,13 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); + if (ip->i_iopen_gh.gh_gl) { + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_wait(&ip->i_iopen_gh); + } + gfs2_holder_uninit(&ip->i_iopen_gh); } - gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", error); From 67893f12e5374bbcaaffbc6e570acbc2714ea884 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Jan 2016 13:08:10 -0500 Subject: [PATCH 2/6] gfs2: avoid uninitialized variable warning We get a bogus warning about a potential uninitialized variable use in gfs2, because the compiler does not figure out that we never use the leaf number if get_leaf_nr() returns an error: fs/gfs2/dir.c: In function 'get_first_leaf': fs/gfs2/dir.c:802:9: warning: 'leaf_no' may be used uninitialized in this function [-Wmaybe-uninitialized] fs/gfs2/dir.c: In function 'dir_split_leaf': fs/gfs2/dir.c:1021:8: warning: 'leaf_no' may be used uninitialized in this function [-Wmaybe-uninitialized] Changing the 'if (!error)' to 'if (!IS_ERR_VALUE(error))' is sufficient to let gcc understand that this is exactly the same condition as in IS_ERR() so it can optimize the code path enough to understand it. Signed-off-by: Arnd Bergmann Signed-off-by: Bob Peterson --- fs/gfs2/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 6a92592304fb..d4014af4f064 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -798,7 +798,7 @@ static int get_first_leaf(struct gfs2_inode *dip, u32 index, int error; error = get_leaf_nr(dip, index, &leaf_no); - if (!error) + if (!IS_ERR_VALUE(error)) error = get_leaf(dip, leaf_no, bh_out); return error; @@ -1014,7 +1014,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) index = name->hash >> (32 - dip->i_depth); error = get_leaf_nr(dip, index, &leaf_no); - if (error) + if (IS_ERR_VALUE(error)) return error; /* Get the old leaf block */ From 2df6f47150b6afbb258ed1d5c9ed78c23df05053 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 27 Jan 2016 16:00:38 -0500 Subject: [PATCH 3/6] GFS2: Fix direct IO write rounding error The fsx test in xfstests was failing because it was using direct IO writes which were using a bad calculation. It was using loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); when it should be loff_t lstart = offset & ~(PAGE_CACHE_SIZE - 1); Thus, the write at offset 0x67e00 was calculating lstart to be 0xe00, the address of our corruption. Instead, it should have been 0x67000. This patch fixes the calculation. Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 93f07465e5a6..aa016e4b8bec 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1082,7 +1082,7 @@ static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * the first place, mapping->nr_pages will always be zero. */ if (mapping->nrpages) { - loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); + loff_t lstart = offset & ~(PAGE_CACHE_SIZE - 1); loff_t len = iov_iter_count(iter); loff_t end = PAGE_ALIGN(offset + len) - 1; From a4923865ea071b0bd708339df7a83c76732fa2db Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 7 Dec 2015 16:24:27 -0600 Subject: [PATCH 4/6] GFS2: Prevent delete work from occurring on glocks used for create This patch tries to prevent delete work (queued via iopen callback) from executing if the glock is currently being used to create a new inode. Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/glock.c | 7 +++++++ fs/gfs2/incore.h | 1 + fs/gfs2/inode.c | 7 ++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5788ebff716a..7f0257309b3e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -572,6 +572,12 @@ static void delete_work_func(struct work_struct *work) struct inode *inode; u64 no_addr = gl->gl_name.ln_number; + /* If someone's using this glock to create a new dinode, the block must + have been freed by another node, then re-used, in which case our + iopen callback is too late after the fact. Ignore it. */ + if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) + goto out; + ip = gl->gl_object; /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ @@ -583,6 +589,7 @@ static void delete_work_func(struct work_struct *work) d_prune_aliases(inode); iput(inode); } +out: gfs2_glock_put(gl); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 845fb09cc606..a6a3389a07fc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -328,6 +328,7 @@ enum { GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, + GLF_INODE_CREATING = 16, /* Inode creation occurring */ }; struct gfs2_glock { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 3e94400d587c..95a914524a39 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -592,7 +592,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error, free_vfs_inode = 1; u32 aflags = 0; unsigned blocks = 1; @@ -729,6 +729,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags)); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; @@ -771,12 +773,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, } gfs2_glock_dq_uninit(ghs); gfs2_glock_dq_uninit(ghs + 1); + clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); return error; fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_put(io_gl); fail_gunlock2: + if (io_gl) + clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_dq_uninit(ghs + 1); fail_free_inode: if (ip->i_gl) From ff34245d524a898eee6e013eb1ec165095277148 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 27 Mar 2015 08:25:41 -0500 Subject: [PATCH 5/6] GFS2: Don't filter out I_FREEING inodes anymore This patch basically reverts a very old patch from 2008, 7a9f53b3c1875bef22ad4588e818bc046ef183da, with the title "Alternate gfs2_iget to avoid looking up inodes being freed". The original patch was designed to avoid a deadlock caused by lock ordering with try_rgrp_unlink. The patch forced the function to not find inodes that were being removed by VFS. The problem is, that made it impossible for nodes to delete their own unlinked dinodes after a certain point in time, because the inode needed was not found by this filtering process. There is no longer a need for the patch, since function try_rgrp_unlink no longer locks the inode: All it does is queue the glock onto the delete work_queue, so there should be no more deadlock. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/export.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/inode.c | 59 ++++-------------------------------------------- fs/gfs2/inode.h | 2 +- 4 files changed, 7 insertions(+), 58 deletions(-) diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5d15e9498b48..d5bda8513457 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -137,7 +137,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_sbd *sdp = sb->s_fs_info; struct inode *inode; - inode = gfs2_ilookup(sb, inum->no_addr, 0); + inode = gfs2_ilookup(sb, inum->no_addr); if (inode) { if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { iput(inode); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7f0257309b3e..6539131c52a2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -582,7 +582,7 @@ static void delete_work_func(struct work_struct *work) /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ if (ip) - inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1); + inode = gfs2_ilookup(sdp->sd_vfs, no_addr); else inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); if (inode && !IS_ERR(inode)) { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 95a914524a39..689ddb09e159 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -37,61 +37,9 @@ #include "super.h" #include "glops.h" -struct gfs2_skip_data { - u64 no_addr; - int skipped; - int non_block; -}; - -static int iget_test(struct inode *inode, void *opaque) +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) { - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_skip_data *data = opaque; - - if (ip->i_no_addr == data->no_addr) { - if (data->non_block && - inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { - data->skipped = 1; - return 0; - } - return 1; - } - return 0; -} - -static int iget_set(struct inode *inode, void *opaque) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_skip_data *data = opaque; - - if (data->skipped) - return -ENOENT; - inode->i_ino = (unsigned long)(data->no_addr); - ip->i_no_addr = data->no_addr; - return 0; -} - -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block) -{ - unsigned long hash = (unsigned long)no_addr; - struct gfs2_skip_data data; - - data.no_addr = no_addr; - data.skipped = 0; - data.non_block = non_block; - return ilookup5(sb, hash, iget_test, &data); -} - -static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, - int non_block) -{ - struct gfs2_skip_data data; - unsigned long hash = (unsigned long)no_addr; - - data.no_addr = no_addr; - data.skipped = 0; - data.non_block = non_block; - return iget5_locked(sb, hash, iget_test, iget_set, &data); + return ilookup(sb, (unsigned long)no_addr); } /** @@ -145,8 +93,9 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, struct gfs2_glock *io_gl = NULL; int error; - inode = gfs2_iget(sb, no_addr, non_block); + inode = iget_locked(sb, (unsigned long)no_addr); ip = GFS2_I(inode); + ip->i_no_addr = no_addr; if (!inode) return ERR_PTR(-ENOMEM); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index ba4d9492d422..22c27a8498e2 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -99,7 +99,7 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype); -extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock); +extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); extern int gfs2_inode_refresh(struct gfs2_inode *ip); From 73b462d2808d7cbca4d7886cf6aaed850640e6cd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 18 Dec 2015 11:44:49 -0600 Subject: [PATCH 6/6] GFS2: Eliminate parameter non_block on gfs2_inode_lookup Now that we're not filtering out I_FREEING inodes from our lookups anymore, we can eliminate the non_block parameter from the lookup function. Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/dir.c | 2 +- fs/gfs2/inode.c | 5 ++--- fs/gfs2/inode.h | 3 +-- fs/gfs2/ops_fstype.c | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index d4014af4f064..4a01f30e9995 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1660,7 +1660,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, brelse(bh); if (fail_on_exist) return ERR_PTR(-EEXIST); - inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); + inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino); if (!IS_ERR(inode)) GFS2_I(inode)->i_rahead = rahead; return inode; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 689ddb09e159..fa0c781c2522 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -80,13 +80,12 @@ static void gfs2_set_iop(struct inode *inode) * @sb: The super block * @no_addr: The inode number * @type: The type of the inode - * non_block: Can we block on inodes that are being freed? * * Returns: A VFS inode, or an error */ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, - u64 no_addr, u64 no_formal_ino, int non_block) + u64 no_addr, u64 no_formal_ino) { struct inode *inode; struct gfs2_inode *ip; @@ -170,7 +169,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, if (error) goto fail; - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); if (IS_ERR(inode)) goto fail; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 22c27a8498e2..e1af0d4aa308 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -94,8 +94,7 @@ static inline int gfs2_check_internal_file_size(struct inode *inode, } extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino, - int non_block); + u64 no_addr, u64 no_formal_ino); extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index dbed9e243ea2..49b0bff18fe3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -454,7 +454,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, struct dentry *dentry; struct inode *inode; - inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); if (IS_ERR(inode)) { fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); return PTR_ERR(inode);