From 76b57302526ae289e8094a51d6a71031ff3d058b Mon Sep 17 00:00:00 2001 From: Peter Watkins Date: Thu, 4 Dec 2014 09:30:51 +1100 Subject: [PATCH 01/10] xfs: overflow in xfs_iomap_eof_align_last_fsb If extsize is set and new_last_fsb is larger than 32 bits, the roundup to extsize will overflow the align variable. Instead, combine alignments by rounding stripe size up to extsize. Signed-off-by: Peter Watkins Reviewed-by: Nathaniel W. Turner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index afcf3c926565..3fad07136c5d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -52,7 +52,6 @@ xfs_iomap_eof_align_last_fsb( xfs_extlen_t extsize, xfs_fileoff_t *last_fsb) { - xfs_fileoff_t new_last_fsb = 0; xfs_extlen_t align = 0; int eof, error; @@ -70,8 +69,8 @@ xfs_iomap_eof_align_last_fsb( else if (mp->m_dalign) align = mp->m_dalign; - if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align)) - new_last_fsb = roundup_64(*last_fsb, align); + if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) + align = 0; } /* @@ -79,14 +78,14 @@ xfs_iomap_eof_align_last_fsb( * (when file on a real-time subvolume or has di_extsize hint). */ if (extsize) { - if (new_last_fsb) - align = roundup_64(new_last_fsb, extsize); + if (align) + align = roundup_64(align, extsize); else align = extsize; - new_last_fsb = roundup_64(*last_fsb, align); } - if (new_last_fsb) { + if (align) { + xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) return error; From 7a1df1561609c14ac457d65d9a4a2b6c0f4204ad Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:42:21 +1100 Subject: [PATCH 02/10] xfs: fix premature enospc on inode allocation After growing a filesystem, XFS can fail to allocate inodes even though there is a large amount of space available in the filesystem for inodes. The issue is caused by a nearly full allocation group having enough free space in it to be considered for inode allocation, but not enough contiguous free space to actually allocation inodes. This situation results in successful selection of the AG for allocation, then failure of the allocation resulting in ENOSPC being reported to the caller. It is caused by two possible issues. Firstly, we only consider the lognest free extent and whether it would fit an inode chunk. If the extent is not correctly aligned, then we can't allocate an inode chunk in it regardless of the fact that it is large enough. This tends to be a permanent error until space in the AG is freed. The second issue is that we don't actually lock the AGI or AGF when we are doing these checks, and so by the time we get to actually allocating the inode chunk the space we thought we had in the AG may have been allocated. This tends to be a spurious error as it requires a race to trigger. Hence this case is ignored in this patch as the reported problem is for permanent errors. The first issue could be addressed by simply taking into account the alignment when checking the longest extent. This, however, would prevent allocation in AGs that have aligned, exact sized extents free. However, this case should be fairly rare compared to the number of allocations that occur near ENOSPC that would trigger this condition. Hence, when selecting the inode AG, take into account the inode cluster alignment when checking the lognest free extent in the AG. If we can't find any AGs with a contiguous free space large enough to be aligned, drop the alignment addition and just try for an AG that has enough contiguous free space available for an inode chunk. This won't prevent issues from occurring, but should avoid situations where other AGs have lots of free space but the selected AG can't allocate due to alignment constraints. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_ialloc.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 705a7530176e..277a46f96f54 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -48,12 +48,12 @@ */ static inline int xfs_ialloc_cluster_alignment( - xfs_alloc_arg_t *args) + struct xfs_mount *mp) { - if (xfs_sb_version_hasalign(&args->mp->m_sb) && - args->mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) - return args->mp->m_sb.sb_inoalignmt; + if (xfs_sb_version_hasalign(&mp->m_sb) && + mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + return mp->m_sb.sb_inoalignmt; return 1; } @@ -412,7 +412,7 @@ xfs_ialloc_ag_alloc( * but not to use them in the actual exact allocation. */ args.alignment = 1; - args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; + args.minalignslop = xfs_ialloc_cluster_alignment(args.mp) - 1; /* Allow space for the inode btree to split. */ args.minleft = args.mp->m_in_maxlevels - 1; @@ -448,7 +448,7 @@ xfs_ialloc_ag_alloc( args.alignment = args.mp->m_dalign; isaligned = 1; } else - args.alignment = xfs_ialloc_cluster_alignment(&args); + args.alignment = xfs_ialloc_cluster_alignment(args.mp); /* * Need to figure out where to allocate the inode blocks. * Ideally they should be spaced out through the a.g. @@ -477,7 +477,7 @@ xfs_ialloc_ag_alloc( args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); - args.alignment = xfs_ialloc_cluster_alignment(&args); + args.alignment = xfs_ialloc_cluster_alignment(args.mp); if ((error = xfs_alloc_vextent(&args))) return error; } @@ -632,10 +632,24 @@ xfs_ialloc_ag_select( } /* - * Is there enough free space for the file plus a block of - * inodes? (if we need to allocate some)? + * Check that there is enough free space for the file plus a + * chunk of inodes if we need to allocate some. If this is the + * first pass across the AGs, take into account the potential + * space needed for alignment of inode chunks when checking the + * longest contiguous free space in the AG - this prevents us + * from getting ENOSPC because we have free space larger than + * m_ialloc_blks but alignment constraints prevent us from using + * it. + * + * If we can't find an AG with space for full alignment slack to + * be taken into account, we must be near ENOSPC in all AGs. + * Hence we don't include alignment for the second pass and so + * if we fail allocation due to alignment issues then it is most + * likely a real ENOSPC condition. */ ineed = mp->m_ialloc_blks; + if (flags && ineed > 1) + ineed += xfs_ialloc_cluster_alignment(mp); longest = pag->pagf_longest; if (!longest) longest = pag->pagf_flcount > 0; From b11bd671ba8a0268753db25684115acde57d3d32 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:42:24 +1100 Subject: [PATCH 03/10] xfs: cleanup xfs_bmse_shift_one goto mess xfs_bmse_shift_one() jumps around determining whether to shift or merge, making the code flow difficult to follow. Clean it up and use direct error returns (including XFS_WANT_CORRUPTED_RETURN) to make the code flow better and be easier to read. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 43 ++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 20d2e96aef6a..0628a678de12 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5544,35 +5544,29 @@ xfs_bmse_shift_one( startoff = got.br_startoff - offset_shift_fsb; /* delalloc extents should be prevented by caller */ - XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock), - out_error); + XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock)); /* - * If this is the first extent in the file, make sure there's enough - * room at the start of the file and jump right to the shift as there's - * no left extent to merge. + * Check for merge if we've got an extent to the left, otherwise make + * sure there's enough room at the start of the file for the shift. */ - if (*current_ext == 0) { - if (got.br_startoff < offset_shift_fsb) + if (*current_ext) { + /* grab the left extent and check for a large enough hole */ + leftp = xfs_iext_get_ext(ifp, *current_ext - 1); + xfs_bmbt_get_all(leftp, &left); + + if (startoff < left.br_startoff + left.br_blockcount) return -EINVAL; - goto shift_extent; - } - /* grab the left extent and check for a large enough hole */ - leftp = xfs_iext_get_ext(ifp, *current_ext - 1); - xfs_bmbt_get_all(leftp, &left); - - if (startoff < left.br_startoff + left.br_blockcount) + /* check whether to merge the extent or shift it down */ + if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { + return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, + *current_ext, gotp, leftp, cur, + logflags); + } + } else if (got.br_startoff < offset_shift_fsb) return -EINVAL; - /* check whether to merge the extent or shift it down */ - if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) - goto shift_extent; - - return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext, - gotp, leftp, cur, logflags); - -shift_extent: /* * Increment the extent index for the next iteration, update the start * offset of the in-core extent and update the btree if applicable. @@ -5589,14 +5583,11 @@ xfs_bmse_shift_one( got.br_blockcount, &i); if (error) return error; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + XFS_WANT_CORRUPTED_RETURN(i == 1); got.br_startoff = startoff; return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, got.br_blockcount, got.br_state); - -out_error: - return error; } /* From 4db431f57be2d32f35f46023b0c0d9f8b6e06e26 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:42:40 +1100 Subject: [PATCH 04/10] xfs: cleanup xfs_bmse_merge returns Signed-off-by: Dave Chinner xfs_bmse_merge() has a jump label for return that just returns the error value. Convert all the code to just return the error directly and use XFS_WANT_CORRUPTED_RETURN. This also allows the final call to xfs_bmbt_update() to return directly. Noticed while reviewing coccinelle return cleanup patches and wondering why the same return pattern as in xfs_bmse_shift_one() wasn't picked up by the checker pattern... Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0628a678de12..5a42e2ba857f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5489,32 +5489,25 @@ xfs_bmse_merge( error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, got.br_blockcount, &i); if (error) - goto out_error; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); error = xfs_btree_delete(cur, &i); if (error) - goto out_error; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); /* lookup and update size of the previous extent */ error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, left.br_blockcount, &i); if (error) - goto out_error; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); left.br_blockcount = blockcount; - error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, - left.br_blockcount, left.br_state); - if (error) - goto out_error; - - return 0; - -out_error: - return error; + return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, left.br_state); } /* From cdc9cec7c0ff521edf8c0e9c9432bf8fdccfc702 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:42:40 +1100 Subject: [PATCH 05/10] xfs: active inodes stat is broken vn_active only ever gets decremented, so it has a very large negative number. Make it track the inode count we currently have allocated properly so we can easily track the size of the inode cache via tools like PCP. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Dave Chinner --- fs/xfs/xfs_icache.c | 2 ++ fs/xfs/xfs_super.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index b45f7b27b5df..f7a742eb0aaa 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -65,6 +65,7 @@ xfs_inode_alloc( return NULL; } + XFS_STATS_INC(vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); @@ -130,6 +131,7 @@ xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); + XFS_STATS_DEC(vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index a0f58d933261..45fa5b570eb5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1008,7 +1008,6 @@ xfs_fs_evict_inode( clear_inode(inode); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); - XFS_STATS_DEC(vn_active); xfs_inactive(ip); } From 2d3d0c53df99587e1d58759f805c3aae79fac453 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:43:13 +1100 Subject: [PATCH 06/10] xfs: lobotomise xfs_trans_read_buf_map() There's a case in that code where it checks for a buffer match in a transaction where the buffer is not marked done. i.e. trying to catch a buffer we have locked in the transaction but have not completed IO on. The only way we can find a buffer that has not had IO completed on it is if it had readahead issued on it, but we never do readahead on buffers that we have already joined into a transaction. Hence this condition cannot occur, and buffers locked and joined into a transaction should always be marked done and not under IO. Remove this code and re-order xfs_trans_read_buf_map() to remove duplicated IO dispatch and error handling code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_trans_buf.c | 133 ++++++++++------------------------------- 1 file changed, 32 insertions(+), 101 deletions(-) diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index e2b2216b1635..2f363cdd1e14 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -229,13 +229,6 @@ xfs_trans_getsb(xfs_trans_t *tp, return bp; } -#ifdef DEBUG -xfs_buftarg_t *xfs_error_target; -int xfs_do_error; -int xfs_req_num; -int xfs_error_mod = 33; -#endif - /* * Get and lock the buffer for the caller if it is not already * locked within the given transaction. If it has not yet been @@ -257,46 +250,11 @@ xfs_trans_read_buf_map( struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { - xfs_buf_t *bp; - xfs_buf_log_item_t *bip; + struct xfs_buf *bp = NULL; + struct xfs_buf_log_item *bip; int error; *bpp = NULL; - if (!tp) { - bp = xfs_buf_read_map(target, map, nmaps, flags, ops); - if (!bp) - return (flags & XBF_TRYLOCK) ? - -EAGAIN : -ENOMEM; - - if (bp->b_error) { - error = bp->b_error; - xfs_buf_ioerror_alert(bp, __func__); - XFS_BUF_UNDONE(bp); - xfs_buf_stale(bp); - xfs_buf_relse(bp); - - /* bad CRC means corrupted metadata */ - if (error == -EFSBADCRC) - error = -EFSCORRUPTED; - return error; - } -#ifdef DEBUG - if (xfs_do_error) { - if (xfs_error_target == target) { - if (((xfs_req_num++) % xfs_error_mod) == 0) { - xfs_buf_relse(bp); - xfs_debug(mp, "Returning error!"); - return -EIO; - } - } - } -#endif - if (XFS_FORCED_SHUTDOWN(mp)) - goto shutdown_abort; - *bpp = bp; - return 0; - } - /* * If we find the buffer in the cache with this transaction * pointer in its b_fsprivate2 field, then we know we already @@ -305,49 +263,24 @@ xfs_trans_read_buf_map( * If the buffer is not yet read in, then we read it in, increment * the lock recursion count, and return it to the caller. */ - bp = xfs_trans_buf_item_match(tp, target, map, nmaps); - if (bp != NULL) { + if (tp) + bp = xfs_trans_buf_item_match(tp, target, map, nmaps); + if (bp) { ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_transp == tp); ASSERT(bp->b_fspriv != NULL); ASSERT(!bp->b_error); - if (!(XFS_BUF_ISDONE(bp))) { - trace_xfs_trans_read_buf_io(bp, _RET_IP_); - ASSERT(!XFS_BUF_ISASYNC(bp)); - ASSERT(bp->b_iodone == NULL); - XFS_BUF_READ(bp); - bp->b_ops = ops; + ASSERT(bp->b_flags & XBF_DONE); - error = xfs_buf_submit_wait(bp); - if (error) { - if (!XFS_FORCED_SHUTDOWN(mp)) - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_relse(bp); - /* - * We can gracefully recover from most read - * errors. Ones we can't are those that happen - * after the transaction's already dirty. - */ - if (tp->t_flags & XFS_TRANS_DIRTY) - xfs_force_shutdown(tp->t_mountp, - SHUTDOWN_META_IO_ERROR); - /* bad CRC means corrupted metadata */ - if (error == -EFSBADCRC) - error = -EFSCORRUPTED; - return error; - } - } /* * We never locked this buf ourselves, so we shouldn't * brelse it either. Just get out. */ if (XFS_FORCED_SHUTDOWN(mp)) { trace_xfs_trans_read_buf_shut(bp, _RET_IP_); - *bpp = NULL; return -EIO; } - bip = bp->b_fspriv; bip->bli_recur++; @@ -358,17 +291,29 @@ xfs_trans_read_buf_map( } bp = xfs_buf_read_map(target, map, nmaps, flags, ops); - if (bp == NULL) { - *bpp = NULL; - return (flags & XBF_TRYLOCK) ? - 0 : -ENOMEM; + if (!bp) { + if (!(flags & XBF_TRYLOCK)) + return -ENOMEM; + return tp ? 0 : -EAGAIN; } + + /* + * If we've had a read error, then the contents of the buffer are + * invalid and should not be used. To ensure that a followup read tries + * to pull the buffer from disk again, we clear the XBF_DONE flag and + * mark the buffer stale. This ensures that anyone who has a current + * reference to the buffer will interpret it's contents correctly and + * future cache lookups will also treat it as an empty, uninitialised + * buffer. + */ if (bp->b_error) { error = bp->b_error; + if (!XFS_FORCED_SHUTDOWN(mp)) + xfs_buf_ioerror_alert(bp, __func__); + bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); - XFS_BUF_DONE(bp); - xfs_buf_ioerror_alert(bp, __func__); - if (tp->t_flags & XFS_TRANS_DIRTY) + + if (tp && (tp->t_flags & XFS_TRANS_DIRTY)) xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); @@ -377,33 +322,19 @@ xfs_trans_read_buf_map( error = -EFSCORRUPTED; return error; } -#ifdef DEBUG - if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) { - if (xfs_error_target == target) { - if (((xfs_req_num++) % xfs_error_mod) == 0) { - xfs_force_shutdown(tp->t_mountp, - SHUTDOWN_META_IO_ERROR); - xfs_buf_relse(bp); - xfs_debug(mp, "Returning trans error!"); - return -EIO; - } - } + + if (XFS_FORCED_SHUTDOWN(mp)) { + xfs_buf_relse(bp); + trace_xfs_trans_read_buf_shut(bp, _RET_IP_); + return -EIO; } -#endif - if (XFS_FORCED_SHUTDOWN(mp)) - goto shutdown_abort; - _xfs_trans_bjoin(tp, bp, 1); + if (tp) + _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_read_buf(bp->b_fspriv); - *bpp = bp; return 0; -shutdown_abort: - trace_xfs_trans_read_buf_shut(bp, _RET_IP_); - xfs_buf_relse(bp); - *bpp = NULL; - return -EIO; } /* From 1b767ee386c5f58660ca9be70d3076f2b6484e72 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:43:17 +1100 Subject: [PATCH 07/10] xfs: move ftype conversion functions to libxfs These functions are needed in userspace for repair and mkfs to do the right thing. Move them to libxfs so they can be easily shared. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2.c | 16 ++++++++++++++++ fs/xfs/libxfs/xfs_dir2.h | 6 ++++++ fs/xfs/libxfs/xfs_dir2_priv.h | 6 ------ fs/xfs/xfs_dir2_readdir.c | 18 +----------------- fs/xfs/xfs_iops.c | 2 +- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 7075aaf131f4..0246877d2d2e 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -38,6 +38,22 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; +/* + * @mode, if set, indicates that the type field needs to be set up. + * This uses the transformation from file mode to DT_* as defined in linux/fs.h + * for file type specification. This will be propagated into the directory + * structure if appropriate for the given operation and filesystem config. + */ +const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { + [0] = XFS_DIR3_FT_UNKNOWN, + [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, + [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, + [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, +}; /* * ASCII case-insensitive (ie. A-Z) support for directories that was diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 4dff261e6ed5..874720dbd865 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -31,6 +31,12 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; +/* + * directory filetype conversion tables. + */ +#define S_SHIFT 12 +extern const unsigned char xfs_mode_to_ftype[]; + /* * directory operations vector for encode/decode routines */ diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 27ce0794d196..27096ba91dde 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -161,12 +161,6 @@ extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const unsigned char *name, int len); -#define S_SHIFT 12 -extern const unsigned char xfs_mode_to_ftype[]; - -extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, - __uint8_t filetype); - /* xfs_dir2_block.c */ extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp, diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index f1b69edcdf31..d4b301a85288 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -44,7 +44,7 @@ static unsigned char xfs_dir3_filetype_table[] = { DT_FIFO, DT_SOCK, DT_LNK, DT_WHT, }; -unsigned char +static unsigned char xfs_dir3_get_dtype( struct xfs_mount *mp, __uint8_t filetype) @@ -57,22 +57,6 @@ xfs_dir3_get_dtype( return xfs_dir3_filetype_table[filetype]; } -/* - * @mode, if set, indicates that the type field needs to be set up. - * This uses the transformation from file mode to DT_* as defined in linux/fs.h - * for file type specification. This will be propagated into the directory - * structure if appropriate for the given operation and filesystem config. - */ -const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { - [0] = XFS_DIR3_FT_UNKNOWN, - [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, - [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, - [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, -}; STATIC int xfs_dir2_sf_getdents( diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ec6dcdc181ee..4b7802aff9ec 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -37,7 +37,7 @@ #include "xfs_icache.h" #include "xfs_symlink.h" #include "xfs_da_btree.h" -#include "xfs_dir2_priv.h" +#include "xfs_dir2.h" #include "xfs_dinode.h" #include "xfs_trans_space.h" From 9a2cc41cda189bea1fb9c41ef64d19949d3b2bcd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:43:17 +1100 Subject: [PATCH 08/10] xfs: move type conversion functions to xfs_dir.h These are currently considered private to libxfs, but they are widely used by the userspace code to decode, walk and check directory structures. Hence they really form part of the external API and as such need to bemoved to xfs_dir2.h. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2.h | 134 ++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_dir2_priv.h | 134 ---------------------------------- fs/xfs/xfs_export.c | 1 + fs/xfs/xfs_log_recover.c | 1 + fs/xfs/xfs_mount.c | 1 + 5 files changed, 137 insertions(+), 134 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 874720dbd865..e55353651f5b 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -183,4 +183,138 @@ extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops; extern const struct xfs_buf_ops xfs_dir3_free_buf_ops; extern const struct xfs_buf_ops xfs_dir3_data_buf_ops; +/* + * Directory offset/block conversion functions. + * + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t)(by >> geo->blklog); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)db << geo->blklog) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); +} + +/* + * Directory tail pointer accessor functions. Based on block geometry. + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir2_block_tail *) + ((char *)hdr + geo->blksize)) - 1; +} + +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) +{ + return (struct xfs_dir2_leaf_tail *) + ((char *)lp + geo->blksize - + sizeof(struct xfs_dir2_leaf_tail)); +} + #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 27096ba91dde..ef9f6ead96a4 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -20,140 +20,6 @@ struct dir_context; -/* - * Directory offset/block conversion functions. - * - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t)(by >> geo->blklog); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)db << geo->blklog) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); -} - -/* - * Directory tail pointer accessor functions. Based on block geometry. - */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir2_block_tail *) - ((char *)hdr + geo->blksize)) - 1; -} - -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) -{ - return (struct xfs_dir2_leaf_tail *) - ((char *)lp + geo->blksize - - sizeof(struct xfs_dir2_leaf_tail)); -} - /* xfs_dir2.c */ extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 5a6bd5d8779a..ba545634cc04 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -23,6 +23,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_export.h" #include "xfs_inode.h" diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 00cd7f3a8f59..f7c312fe91fb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -27,6 +27,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_log.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 13d117089101..acbb4837abc5 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -27,6 +27,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_ialloc.h" From 32296f865e8d41ff8c337ce6f0b97eeda08988a3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2014 09:43:17 +1100 Subject: [PATCH 09/10] xfs: fix set-but-unused warnings The kernel compile doesn't turn on these checks by default, so it's only when I do a kernel-user sync that I find that there are lots of compiler warnings waiting to be fixed. Fix up these set-but-unused warnings. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 2 -- fs/xfs/libxfs/xfs_da_btree.c | 4 ---- fs/xfs/libxfs/xfs_dir2_block.c | 8 -------- fs/xfs/libxfs/xfs_dir2_leaf.c | 10 ---------- fs/xfs/libxfs/xfs_dir2_node.c | 12 ------------ fs/xfs/libxfs/xfs_dir2_sf.c | 10 ---------- fs/xfs/xfs_inode.c | 12 ++---------- fs/xfs/xfs_iomap.c | 2 -- fs/xfs/xfs_itable.c | 2 -- 9 files changed, 2 insertions(+), 60 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 5a42e2ba857f..a0a4db80a4a6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5450,13 +5450,11 @@ xfs_bmse_merge( struct xfs_btree_cur *cur, int *logflags) /* output */ { - struct xfs_ifork *ifp; struct xfs_bmbt_irec got; struct xfs_bmbt_irec left; xfs_filblks_t blockcount; int error, i; - ifp = XFS_IFORK_PTR(ip, whichfork); xfs_bmbt_get_all(gotp, &got); xfs_bmbt_get_all(leftp, &left); blockcount = left.br_blockcount + got.br_blockcount; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index fd827530afec..50cdc32aed1b 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -514,7 +514,6 @@ xfs_da3_root_split( struct xfs_buf *bp; struct xfs_inode *dp; struct xfs_trans *tp; - struct xfs_mount *mp; struct xfs_dir2_leaf *leaf; xfs_dablk_t blkno; int level; @@ -534,7 +533,6 @@ xfs_da3_root_split( dp = args->dp; tp = args->trans; - mp = state->mp; error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); if (error) return error; @@ -2342,14 +2340,12 @@ xfs_da_shrink_inode( xfs_inode_t *dp; int done, error, w, count; xfs_trans_t *tp; - xfs_mount_t *mp; trace_xfs_da_shrink_inode(args); dp = args->dp; w = args->whichfork; tp = args->trans; - mp = dp->i_mount; count = args->geo->fsbcount; for (;;) { /* diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 9628ceccfa02..d8cfc3f2a224 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -353,7 +353,6 @@ xfs_dir2_block_addname( int low; /* low index for binary srch */ int lowstale; /* low stale index */ int mid=0; /* midpoint for binary srch */ - xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log header */ int needscan; /* need to rescan freespace */ __be16 *tagp; /* pointer to tag value */ @@ -363,7 +362,6 @@ xfs_dir2_block_addname( dp = args->dp; tp = args->trans; - mp = dp->i_mount; /* Read the (one and only) directory block into bp. */ error = xfs_dir3_block_read(tp, dp, &bp); @@ -618,7 +616,6 @@ xfs_dir2_block_lookup( xfs_inode_t *dp; /* incore inode */ int ent; /* entry index */ int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ trace_xfs_dir2_block_lookup(args); @@ -629,7 +626,6 @@ xfs_dir2_block_lookup( if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) return error; dp = args->dp; - mp = dp->i_mount; hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); btp = xfs_dir2_block_tail_p(args->geo, hdr); @@ -770,7 +766,6 @@ xfs_dir2_block_removename( xfs_inode_t *dp; /* incore inode */ int ent; /* block leaf entry index */ int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log block header */ int needscan; /* need to fixup bestfree */ xfs_dir2_sf_hdr_t sfh; /* shortform header */ @@ -788,7 +783,6 @@ xfs_dir2_block_removename( } dp = args->dp; tp = args->trans; - mp = dp->i_mount; hdr = bp->b_addr; btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); @@ -852,7 +846,6 @@ xfs_dir2_block_replace( xfs_inode_t *dp; /* incore inode */ int ent; /* leaf entry index */ int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ trace_xfs_dir2_block_replace(args); @@ -864,7 +857,6 @@ xfs_dir2_block_replace( return error; } dp = args->dp; - mp = dp->i_mount; hdr = bp->b_addr; btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a19174eb3cb2..e333fa1257c5 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -384,7 +384,6 @@ xfs_dir2_block_to_leaf( xfs_dir2_db_t ldb; /* leaf block's bno */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ - xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log block header */ int needscan; /* need to rescan bestfree */ xfs_trans_t *tp; /* transaction pointer */ @@ -395,7 +394,6 @@ xfs_dir2_block_to_leaf( trace_xfs_dir2_block_to_leaf(args); dp = args->dp; - mp = dp->i_mount; tp = args->trans; /* * Add the leaf block to the inode. @@ -626,7 +624,6 @@ xfs_dir2_leaf_addname( int lfloghigh; /* high leaf logging index */ int lowstale; /* index of prev stale leaf */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ - xfs_mount_t *mp; /* filesystem mount point */ int needbytes; /* leaf block bytes needed */ int needlog; /* need to log data header */ int needscan; /* need to rescan data free */ @@ -641,7 +638,6 @@ xfs_dir2_leaf_addname( dp = args->dp; tp = args->trans; - mp = dp->i_mount; error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); if (error) @@ -1356,11 +1352,9 @@ xfs_dir2_leaf_removename( xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log data header */ int needscan; /* need to rescan data frees */ xfs_dir2_data_off_t oldbest; /* old value of best free */ - xfs_trans_t *tp; /* transaction pointer */ struct xfs_dir2_data_free *bf; /* bestfree table */ struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; @@ -1374,8 +1368,6 @@ xfs_dir2_leaf_removename( return error; } dp = args->dp; - tp = args->trans; - mp = dp->i_mount; leaf = lbp->b_addr; hdr = dbp->b_addr; xfs_dir3_data_check(dp, dbp); @@ -1607,11 +1599,9 @@ xfs_dir2_leaf_trim_data( int error; /* error return value */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* filesystem mount point */ xfs_trans_t *tp; /* transaction pointer */ dp = args->dp; - mp = dp->i_mount; tp = args->trans; /* * Read the offending data block. We need its buffer. diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 2ae6ac2c11ae..afcb7753b61c 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -297,7 +297,6 @@ xfs_dir2_leaf_to_node( int i; /* leaf freespace index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - xfs_mount_t *mp; /* filesystem mount point */ int n; /* count of live freespc ents */ xfs_dir2_data_off_t off; /* freespace entry value */ __be16 *to; /* pointer to freespace entry */ @@ -307,7 +306,6 @@ xfs_dir2_leaf_to_node( trace_xfs_dir2_leaf_to_node(args); dp = args->dp; - mp = dp->i_mount; tp = args->trans; /* * Add a freespace block to the directory. @@ -387,16 +385,12 @@ xfs_dir2_leafn_add( int lfloghigh; /* high leaf entry logging */ int lfloglow; /* low leaf entry logging */ int lowstale; /* previous stale entry */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ struct xfs_dir3_icleaf_hdr leafhdr; struct xfs_dir2_leaf_entry *ents; trace_xfs_dir2_leafn_add(args, index); dp = args->dp; - mp = dp->i_mount; - tp = args->trans; leaf = bp->b_addr; dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ents = dp->d_ops->leaf_ents_p(leaf); @@ -1170,7 +1164,6 @@ xfs_dir2_leafn_remove( xfs_dir2_leaf_entry_t *lep; /* leaf entry */ int longest; /* longest data free entry */ int off; /* data block entry offset */ - xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log data header */ int needscan; /* need to rescan data frees */ xfs_trans_t *tp; /* transaction pointer */ @@ -1182,7 +1175,6 @@ xfs_dir2_leafn_remove( dp = args->dp; tp = args->trans; - mp = dp->i_mount; leaf = bp->b_addr; dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ents = dp->d_ops->leaf_ents_p(leaf); @@ -1323,7 +1315,6 @@ xfs_dir2_leafn_split( xfs_da_args_t *args; /* operation arguments */ xfs_dablk_t blkno; /* new leaf block number */ int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ struct xfs_inode *dp; /* @@ -1331,7 +1322,6 @@ xfs_dir2_leafn_split( */ args = state->args; dp = args->dp; - mp = dp->i_mount; ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); error = xfs_da_grow_inode(args, &blkno); if (error) { @@ -2231,12 +2221,10 @@ xfs_dir2_node_trim_free( xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ xfs_dir2_free_t *free; /* freespace structure */ - xfs_mount_t *mp; /* filesystem mount point */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_dir3_icfree_hdr freehdr; dp = args->dp; - mp = dp->i_mount; tp = args->trans; /* * Read the freespace block. diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 5079e051ef08..d0863d44d80d 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -455,13 +455,11 @@ xfs_dir2_sf_addname_hard( xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ - struct xfs_mount *mp; /* * Copy the old directory to the stack buffer. */ dp = args->dp; - mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; @@ -542,7 +540,6 @@ xfs_dir2_sf_addname_pick( xfs_inode_t *dp; /* incore directory inode */ int holefit; /* found hole it will fit in */ int i; /* entry number */ - xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_data_aoff_t offset; /* data block offset */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ @@ -550,7 +547,6 @@ xfs_dir2_sf_addname_pick( int used; /* data bytes used */ dp = args->dp; - mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; size = dp->d_ops->data_entsize(args->namelen); @@ -616,10 +612,8 @@ xfs_dir2_sf_check( int offset; /* data offset */ xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - struct xfs_mount *mp; dp = args->dp; - mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; offset = dp->d_ops->data_first_offset; @@ -1016,12 +1010,10 @@ xfs_dir2_sf_toino4( int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ - struct xfs_mount *mp; trace_xfs_dir2_sf_toino4(args); dp = args->dp; - mp = dp->i_mount; /* * Copy the old directory to the buffer. @@ -1094,12 +1086,10 @@ xfs_dir2_sf_toino8( int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ - struct xfs_mount *mp; trace_xfs_dir2_sf_toino8(args); dp = args->dp; - mp = dp->i_mount; /* * Copy the old directory to the buffer. diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2ffb80267e37..c0b17ec70c99 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2485,9 +2485,7 @@ xfs_remove( xfs_fsblock_t first_block; int cancel_flags; int committed; - int link_zero; uint resblks; - uint log_count; trace_xfs_remove(dp, name); @@ -2502,13 +2500,10 @@ xfs_remove( if (error) goto std_return; - if (is_dir) { + if (is_dir) tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); - log_count = XFS_DEFAULT_LOG_COUNT; - } else { + else tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); - log_count = XFS_REMOVE_LOG_COUNT; - } cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* @@ -2576,9 +2571,6 @@ xfs_remove( if (error) goto out_trans_cancel; - /* Determine if this is the last link while the inode is locked */ - link_zero = (ip->i_d.di_nlink == 0); - xfs_bmap_init(&free_list, &first_block); error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block, &free_list, resblks); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3fad07136c5d..08e38b96e03e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -263,7 +263,6 @@ xfs_iomap_eof_want_preallocate( { xfs_fileoff_t start_fsb; xfs_filblks_t count_fsb; - xfs_fsblock_t firstblock; int n, error, imaps; int found_delalloc = 0; @@ -288,7 +287,6 @@ xfs_iomap_eof_want_preallocate( count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); while (count_fsb > 0) { imaps = nimaps; - firstblock = NULLFSBLOCK; error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, 0); if (error) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f1deb961a296..61abca96ca73 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -349,7 +349,6 @@ xfs_bulkstat( int *done) /* 1 if there are more stats to get */ { xfs_buf_t *agbp; /* agi header buffer */ - xfs_agi_t *agi; /* agi header data */ xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ @@ -408,7 +407,6 @@ xfs_bulkstat( error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); if (error) break; - agi = XFS_BUF_TO_AGI(agbp); /* * Allocate and initialize a btree cursor for ialloc btree. */ From b29c70f59870dad0945b0e0b3fe3758ad528e268 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 4 Dec 2014 09:43:17 +1100 Subject: [PATCH 10/10] xfs: split metadata and log buffer completion to separate workqueues XFS traditionally sends all buffer I/O completion work to a single workqueue. This includes metadata buffer completion and log buffer completion. The log buffer completion requires a high priority queue to prevent stalls due to log forces getting stuck behind other queued work. Rather than continue to prioritize all buffer I/O completion due to the needs of log completion, split log buffer completion off to m_log_workqueue and move the high priority flag from m_buf_workqueue to m_log_workqueue. Add a b_ioend_wq wq pointer to xfs_buf to allow completion workqueue customization on a per-buffer basis. Initialize b_ioend_wq to m_buf_workqueue by default in the generic buffer I/O submission path. Finally, override the default wq with the high priority m_log_workqueue in the log buffer I/O submission path. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 13 ++++++++++--- fs/xfs/xfs_buf.h | 3 ++- fs/xfs/xfs_log.c | 4 ++++ fs/xfs/xfs_super.c | 5 ++--- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d083889535a2..945bea924e48 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1041,7 +1041,7 @@ xfs_buf_ioend_work( struct work_struct *work) { struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_iodone_work); + container_of(work, xfs_buf_t, b_ioend_work); xfs_buf_ioend(bp); } @@ -1050,8 +1050,8 @@ void xfs_buf_ioend_async( struct xfs_buf *bp) { - INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); - queue_work(bp->b_target->bt_mount->m_buf_workqueue, &bp->b_iodone_work); + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); + queue_work(bp->b_ioend_wq, &bp->b_ioend_work); } void @@ -1220,6 +1220,13 @@ _xfs_buf_ioapply( */ bp->b_error = 0; + /* + * Initialize the I/O completion workqueue if we haven't yet or the + * submitter has not opted to specify a custom one. + */ + if (!bp->b_ioend_wq) + bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; + if (bp->b_flags & XBF_WRITE) { if (bp->b_flags & XBF_SYNCIO) rw = WRITE_SYNC; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 82002c00af90..75ff5d5a7d2e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -164,7 +164,8 @@ typedef struct xfs_buf { struct xfs_perag *b_pag; /* contains rbtree root */ xfs_buftarg_t *b_target; /* buffer target (device) */ void *b_addr; /* virtual address of buffer */ - struct work_struct b_iodone_work; + struct work_struct b_ioend_work; + struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e810e9df91b7..d508a654e198 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1808,6 +1808,8 @@ xlog_sync( XFS_BUF_ZEROFLAGS(bp); XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_SYNCIO; + /* use high priority completion wq */ + bp->b_ioend_wq = log->l_mp->m_log_workqueue; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { bp->b_flags |= XBF_FUA; @@ -1856,6 +1858,8 @@ xlog_sync( bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) bp->b_flags |= XBF_FUA; + /* use high priority completion wq */ + bp->b_ioend_wq = log->l_mp->m_log_workqueue; ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 45fa5b570eb5..b5aacfc462af 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -842,8 +842,7 @@ xfs_init_mount_workqueues( struct xfs_mount *mp) { mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", - WQ_MEM_RECLAIM|WQ_HIGHPRI|WQ_FREEZABLE, 1, - mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname); if (!mp->m_buf_workqueue) goto out; @@ -868,7 +867,7 @@ xfs_init_mount_workqueues( goto out_destroy_cil; mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); if (!mp->m_log_workqueue) goto out_destroy_reclaim;