mirror of https://gitee.com/openkylin/linux.git
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: xfs: only run xfs_error_test if error injection is active xfs: avoid moving stale inodes in the AIL xfs: delayed alloc blocks beyond EOF are valid after writeback xfs: push stale, pinned buffers on trylock failures xfs: fix failed write truncation handling.
This commit is contained in:
commit
8cb280c90f
|
@ -934,7 +934,6 @@ xfs_aops_discard_page(
|
|||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct buffer_head *bh, *head;
|
||||
loff_t offset = page_offset(page);
|
||||
ssize_t len = 1 << inode->i_blkbits;
|
||||
|
||||
if (!xfs_is_delayed_page(page, IO_DELAY))
|
||||
goto out_invalidate;
|
||||
|
@ -949,58 +948,14 @@ xfs_aops_discard_page(
|
|||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
int done;
|
||||
xfs_fileoff_t offset_fsb;
|
||||
xfs_bmbt_irec_t imap;
|
||||
int nimaps = 1;
|
||||
int error;
|
||||
xfs_fsblock_t firstblock;
|
||||
xfs_bmap_free_t flist;
|
||||
xfs_fileoff_t start_fsb;
|
||||
|
||||
if (!buffer_delay(bh))
|
||||
goto next_buffer;
|
||||
|
||||
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
|
||||
|
||||
/*
|
||||
* Map the range first and check that it is a delalloc extent
|
||||
* before trying to unmap the range. Otherwise we will be
|
||||
* trying to remove a real extent (which requires a
|
||||
* transaction) or a hole, which is probably a bad idea...
|
||||
*/
|
||||
error = xfs_bmapi(NULL, ip, offset_fsb, 1,
|
||||
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
|
||||
&nimaps, NULL);
|
||||
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
|
||||
"page discard failed delalloc mapping lookup.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!nimaps) {
|
||||
/* nothing there */
|
||||
goto next_buffer;
|
||||
}
|
||||
if (imap.br_startblock != DELAYSTARTBLOCK) {
|
||||
/* been converted, ignore */
|
||||
goto next_buffer;
|
||||
}
|
||||
WARN_ON(imap.br_blockcount == 0);
|
||||
|
||||
/*
|
||||
* Note: while we initialise the firstblock/flist pair, they
|
||||
* should never be used because blocks should never be
|
||||
* allocated or freed for a delalloc extent and hence we need
|
||||
* don't cancel or finish them after the xfs_bunmapi() call.
|
||||
*/
|
||||
xfs_bmap_init(&flist, &firstblock);
|
||||
error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
|
||||
&flist, &done);
|
||||
|
||||
ASSERT(!flist.xbf_count && !flist.xbf_first);
|
||||
start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
|
@ -1010,7 +965,7 @@ xfs_aops_discard_page(
|
|||
break;
|
||||
}
|
||||
next_buffer:
|
||||
offset += len;
|
||||
offset += 1 << inode->i_blkbits;
|
||||
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
|
@ -1505,11 +1460,42 @@ xfs_vm_write_failed(
|
|||
struct inode *inode = mapping->host;
|
||||
|
||||
if (to > inode->i_size) {
|
||||
struct iattr ia = {
|
||||
.ia_valid = ATTR_SIZE | ATTR_FORCE,
|
||||
.ia_size = inode->i_size,
|
||||
};
|
||||
xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
|
||||
/*
|
||||
* punch out the delalloc blocks we have already allocated. We
|
||||
* don't call xfs_setattr() to do this as we may be in the
|
||||
* middle of a multi-iovec write and so the vfs inode->i_size
|
||||
* will not match the xfs ip->i_size and so it will zero too
|
||||
* much. Hence we jus truncate the page cache to zero what is
|
||||
* necessary and punch the delalloc blocks directly.
|
||||
*/
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fileoff_t start_fsb;
|
||||
xfs_fileoff_t end_fsb;
|
||||
int error;
|
||||
|
||||
truncate_pagecache(inode, to, inode->i_size);
|
||||
|
||||
/*
|
||||
* Check if there are any blocks that are outside of i_size
|
||||
* that need to be trimmed back.
|
||||
*/
|
||||
start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
|
||||
end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
|
||||
if (end_fsb <= start_fsb)
|
||||
return;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||
end_fsb - start_fsb);
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
|
||||
"xfs_vm_write_failed: unable to clean up ino %lld",
|
||||
ip->i_ino);
|
||||
}
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -488,29 +488,16 @@ _xfs_buf_find(
|
|||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
|
||||
/* Attempt to get the semaphore without sleeping,
|
||||
* if this does not work then we need to drop the
|
||||
* spinlock and do a hard attempt on the semaphore.
|
||||
*/
|
||||
if (down_trylock(&bp->b_sema)) {
|
||||
if (xfs_buf_cond_lock(bp)) {
|
||||
/* failed, so wait for the lock if requested. */
|
||||
if (!(flags & XBF_TRYLOCK)) {
|
||||
/* wait for buffer ownership */
|
||||
xfs_buf_lock(bp);
|
||||
XFS_STATS_INC(xb_get_locked_waited);
|
||||
} else {
|
||||
/* We asked for a trylock and failed, no need
|
||||
* to look at file offset and length here, we
|
||||
* know that this buffer at least overlaps our
|
||||
* buffer and is locked, therefore our buffer
|
||||
* either does not exist, or is this buffer.
|
||||
*/
|
||||
xfs_buf_rele(bp);
|
||||
XFS_STATS_INC(xb_busy_locked);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
/* trylock worked */
|
||||
XB_SET_OWNER(bp);
|
||||
}
|
||||
|
||||
if (bp->b_flags & XBF_STALE) {
|
||||
|
@ -876,10 +863,18 @@ xfs_buf_rele(
|
|||
*/
|
||||
|
||||
/*
|
||||
* Locks a buffer object, if it is not already locked.
|
||||
* Note that this in no way locks the underlying pages, so it is only
|
||||
* useful for synchronizing concurrent use of buffer objects, not for
|
||||
* synchronizing independent access to the underlying pages.
|
||||
* Locks a buffer object, if it is not already locked. Note that this in
|
||||
* no way locks the underlying pages, so it is only useful for
|
||||
* synchronizing concurrent use of buffer objects, not for synchronizing
|
||||
* independent access to the underlying pages.
|
||||
*
|
||||
* If we come across a stale, pinned, locked buffer, we know that we are
|
||||
* being asked to lock a buffer that has been reallocated. Because it is
|
||||
* pinned, we know that the log has not been pushed to disk and hence it
|
||||
* will still be locked. Rather than continuing to have trylock attempts
|
||||
* fail until someone else pushes the log, push it ourselves before
|
||||
* returning. This means that the xfsaild will not get stuck trying
|
||||
* to push on stale inode buffers.
|
||||
*/
|
||||
int
|
||||
xfs_buf_cond_lock(
|
||||
|
@ -890,6 +885,8 @@ xfs_buf_cond_lock(
|
|||
locked = down_trylock(&bp->b_sema) == 0;
|
||||
if (locked)
|
||||
XB_SET_OWNER(bp);
|
||||
else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
|
||||
xfs_log_force(bp->b_target->bt_mount, 0);
|
||||
|
||||
trace_xfs_buf_cond_lock(bp, _RET_IP_);
|
||||
return locked ? 0 : -EBUSY;
|
||||
|
|
|
@ -5471,8 +5471,13 @@ xfs_getbmap(
|
|||
if (error)
|
||||
goto out_unlock_iolock;
|
||||
}
|
||||
|
||||
ASSERT(ip->i_delayed_blks == 0);
|
||||
/*
|
||||
* even after flushing the inode, there can still be delalloc
|
||||
* blocks on the inode beyond EOF due to speculative
|
||||
* preallocation. These are not removed until the release
|
||||
* function is called or the inode is inactivated. Hence we
|
||||
* cannot assert here that ip->i_delayed_blks == 0.
|
||||
*/
|
||||
}
|
||||
|
||||
lock = xfs_ilock_map_shared(ip);
|
||||
|
@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
|
|||
*count += xfs_bmbt_disk_get_blockcount(frp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* dead simple method of punching delalyed allocation blocks from a range in
|
||||
* the inode. Walks a block at a time so will be slow, but is only executed in
|
||||
* rare error cases so the overhead is not critical. This will alays punch out
|
||||
* both the start and end blocks, even if the ranges only partially overlap
|
||||
* them, so it is up to the caller to ensure that partial blocks are not
|
||||
* passed in.
|
||||
*/
|
||||
int
|
||||
xfs_bmap_punch_delalloc_range(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fileoff_t start_fsb,
|
||||
xfs_fileoff_t length)
|
||||
{
|
||||
xfs_fileoff_t remaining = length;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
|
||||
do {
|
||||
int done;
|
||||
xfs_bmbt_irec_t imap;
|
||||
int nimaps = 1;
|
||||
xfs_fsblock_t firstblock;
|
||||
xfs_bmap_free_t flist;
|
||||
|
||||
/*
|
||||
* Map the range first and check that it is a delalloc extent
|
||||
* before trying to unmap the range. Otherwise we will be
|
||||
* trying to remove a real extent (which requires a
|
||||
* transaction) or a hole, which is probably a bad idea...
|
||||
*/
|
||||
error = xfs_bmapi(NULL, ip, start_fsb, 1,
|
||||
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
|
||||
&nimaps, NULL);
|
||||
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
|
||||
"Failed delalloc mapping lookup ino %lld fsb %lld.",
|
||||
ip->i_ino, start_fsb);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!nimaps) {
|
||||
/* nothing there */
|
||||
goto next_block;
|
||||
}
|
||||
if (imap.br_startblock != DELAYSTARTBLOCK) {
|
||||
/* been converted, ignore */
|
||||
goto next_block;
|
||||
}
|
||||
WARN_ON(imap.br_blockcount == 0);
|
||||
|
||||
/*
|
||||
* Note: while we initialise the firstblock/flist pair, they
|
||||
* should never be used because blocks should never be
|
||||
* allocated or freed for a delalloc extent and hence we need
|
||||
* don't cancel or finish them after the xfs_bunmapi() call.
|
||||
*/
|
||||
xfs_bmap_init(&flist, &firstblock);
|
||||
error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
|
||||
&flist, &done);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
ASSERT(!flist.xbf_count && !flist.xbf_first);
|
||||
next_block:
|
||||
start_fsb++;
|
||||
remaining--;
|
||||
} while(remaining > 0);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
|
|||
int whichfork,
|
||||
int *count);
|
||||
|
||||
int
|
||||
xfs_bmap_punch_delalloc_range(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fileoff_t start_fsb,
|
||||
xfs_fileoff_t length);
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* __XFS_BMAP_H__ */
|
||||
|
|
|
@ -377,6 +377,19 @@ xfs_swap_extents(
|
|||
ip->i_d.di_format = tip->i_d.di_format;
|
||||
tip->i_d.di_format = tmp;
|
||||
|
||||
/*
|
||||
* The extents in the source inode could still contain speculative
|
||||
* preallocation beyond EOF (e.g. the file is open but not modified
|
||||
* while defrag is in progress). In that case, we need to copy over the
|
||||
* number of delalloc blocks the data fork in the source inode is
|
||||
* tracking beyond EOF so that when the fork is truncated away when the
|
||||
* temporary inode is unlinked we don't underrun the i_delayed_blks
|
||||
* counter on that inode.
|
||||
*/
|
||||
ASSERT(tip->i_delayed_blks == 0);
|
||||
tip->i_delayed_blks = ip->i_delayed_blks;
|
||||
ip->i_delayed_blks = 0;
|
||||
|
||||
ilf_fields = XFS_ILOG_CORE;
|
||||
|
||||
switch(ip->i_d.di_format) {
|
||||
|
|
|
@ -58,6 +58,7 @@ xfs_error_trap(int e)
|
|||
int xfs_etest[XFS_NUM_INJECT_ERROR];
|
||||
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
|
||||
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
|
||||
int xfs_error_test_active;
|
||||
|
||||
int
|
||||
xfs_error_test(int error_tag, int *fsidp, char *expression,
|
||||
|
@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
|
|||
len = strlen(mp->m_fsname);
|
||||
xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
|
||||
strcpy(xfs_etest_fsname[i], mp->m_fsname);
|
||||
xfs_error_test_active++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
|
|||
xfs_etest_fsid[i] = 0LL;
|
||||
kmem_free(xfs_etest_fsname[i]);
|
||||
xfs_etest_fsname[i] = NULL;
|
||||
xfs_error_test_active--;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
|
|||
#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
|
||||
|
||||
#ifdef DEBUG
|
||||
extern int xfs_error_test_active;
|
||||
extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
|
||||
|
||||
#define XFS_NUM_INJECT_ERROR 10
|
||||
#define XFS_TEST_ERROR(expr, mp, tag, rf) \
|
||||
((expr) || \
|
||||
((expr) || (xfs_error_test_active && \
|
||||
xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
|
||||
(rf)))
|
||||
(rf))))
|
||||
|
||||
extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
|
||||
extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
|
||||
|
|
|
@ -657,18 +657,37 @@ xfs_inode_item_unlock(
|
|||
}
|
||||
|
||||
/*
|
||||
* This is called to find out where the oldest active copy of the
|
||||
* inode log item in the on disk log resides now that the last log
|
||||
* write of it completed at the given lsn. Since we always re-log
|
||||
* all dirty data in an inode, the latest copy in the on disk log
|
||||
* is the only one that matters. Therefore, simply return the
|
||||
* given lsn.
|
||||
* This is called to find out where the oldest active copy of the inode log
|
||||
* item in the on disk log resides now that the last log write of it completed
|
||||
* at the given lsn. Since we always re-log all dirty data in an inode, the
|
||||
* latest copy in the on disk log is the only one that matters. Therefore,
|
||||
* simply return the given lsn.
|
||||
*
|
||||
* If the inode has been marked stale because the cluster is being freed, we
|
||||
* don't want to (re-)insert this inode into the AIL. There is a race condition
|
||||
* where the cluster buffer may be unpinned before the inode is inserted into
|
||||
* the AIL during transaction committed processing. If the buffer is unpinned
|
||||
* before the inode item has been committed and inserted, then it is possible
|
||||
* for the buffer to be written and IO completions before the inode is inserted
|
||||
* into the AIL. In that case, we'd be inserting a clean, stale inode into the
|
||||
* AIL which will never get removed. It will, however, get reclaimed which
|
||||
* triggers an assert in xfs_inode_free() complaining about freein an inode
|
||||
* still in the AIL.
|
||||
*
|
||||
* To avoid this, return a lower LSN than the one passed in so that the
|
||||
* transaction committed code will not move the inode forward in the AIL but
|
||||
* will still unpin it properly.
|
||||
*/
|
||||
STATIC xfs_lsn_t
|
||||
xfs_inode_item_committed(
|
||||
struct xfs_log_item *lip,
|
||||
xfs_lsn_t lsn)
|
||||
{
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
|
||||
if (xfs_iflags_test(ip, XFS_ISTALE))
|
||||
return lsn - 1;
|
||||
return lsn;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue