Fixes for 5.19-rc5:

- Fix statfs blocking on background inode gc workers
  - Fix some broken inode lock assertion code
  - Fix xattr leaf buffer leaks when cancelling a deferred xattr update
    operation
  - Clean up xattr recovery to make it easier to understand.
  - Fix xattr leaf block verifiers tripping over empty blocks.
  - Remove complicated and error prone xattr leaf block bholding mess.
  - Fix a bug where an rt extent crossing EOF was treated as "posteof"
    blocks and cleaned unnecessarily.
  - Fix a UAF when log shutdown races with unmount.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAmK/kVMACgkQ+H93GTRK
 tOs0tQ/+PYRhEDKrgocxZGJFNvnxqPRdEDu9k5XCnO2Y/DZRAF52F0JZaPtuiFH4
 12e9vzYYRNrE9KifzPWo4j2L067kFszt4XcAjytJuf5f6k/duX7XbsdMb17Qxd28
 mZDtBBSQCc9fcQo21u5SdZlPaD1SC1843jB4Oe7Sbo3AFvVAMwuBUgnp2TSDA8V0
 0q25PUD0ZvWP3UTQS4M4fW4WhFa5wF+GnLR1DZjryFIzuUp9JwdCQZHIFnp6cHq9
 TZMDJ4WhD9igMSzicRfgPoC8z/D3Mm0cFmRoURbG3GLzAeJ+e7PJ43rvlwq6Ajcv
 v5DhyQvFkiVjKLsrtJyvvUGSpkLL/touNG8MUE9I0heiiwb0QbP108aHWU8AS1Dr
 q7XHIxPaOhvlzVZN1uTuZE4N51/0NWITGKBwF0XU1b5D3wLyvOY6fbI7KLfkX2Sa
 4zHKn4QpHUIE9fs5Na3H6L+ndlJclo2DJA6lF26pLgmrT7NLmJG+r97XagBsp/pr
 X8qOvVMg1XJA37Vy1bTN5cfEYzTTksJk/fQ3AvSKHDCeP5u87kiZ6hqNnW6dD0YF
 D8VTX29rVQr5HavbcGCmAyBZpk4CfclCsWCQrZu9MCnQSW37HnObXPJkIWvzt8Mn
 j6emhPcYHy5TwSChxdpzl733ZX0KdkdOAgkWgqtod2E/7fe+g7Q=
 =8QeL
 -----END PGP SIGNATURE-----

Merge tag 'xfs-5.19-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "This fixes some stalling problems and corrects the last of the
  problems (I hope) observed during testing of the new atomic xattr
  update feature.

   - Fix statfs blocking on background inode gc workers

   - Fix some broken inode lock assertion code

   - Fix xattr leaf buffer leaks when cancelling a deferred xattr update
     operation

   - Clean up xattr recovery to make it easier to understand.

   - Fix xattr leaf block verifiers tripping over empty blocks.

   - Remove complicated and error prone xattr leaf block bholding mess.

   - Fix a bug where an rt extent crossing EOF was treated as "posteof"
     blocks and cleaned unnecessarily.

   - Fix a UAF when log shutdown races with unmount"

* tag 'xfs-5.19-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: prevent a UAF when log IO errors race with unmount
  xfs: dont treat rt extents beyond EOF as eofblocks to be cleared
  xfs: don't hold xattr leaf buffers across transaction rolls
  xfs: empty xattr leaf header blocks are not corruption
  xfs: clean up the end of xfs_attri_item_recover
  xfs: always free xattri_leaf_bp when cancelling a deferred op
  xfs: use invalidate_lock to check the state of mmap_lock
  xfs: factor out the common lock flags assert
  xfs: introduce xfs_inodegc_push()
  xfs: bound maximum wait time for inodegc work
This commit is contained in:
Linus Torvalds 2022-07-03 09:42:17 -07:00
commit 20855e4cb3
14 changed files with 130 additions and 131 deletions

View File

@ -50,7 +50,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp); STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
/* /*
* Internal routines when attribute list is more than one block. * Internal routines when attribute list is more than one block.
@ -393,16 +393,10 @@ xfs_attr_sf_addname(
* It won't fit in the shortform, transform to a leaf block. GROT: * It won't fit in the shortform, transform to a leaf block. GROT:
* another possible req'mt for a double-split btree op. * another possible req'mt for a double-split btree op.
*/ */
error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp); error = xfs_attr_shortform_to_leaf(args);
if (error) if (error)
return error; return error;
/*
* Prevent the leaf buffer from being unlocked so that a concurrent AIL
* push cannot grab the half-baked leaf buffer and run into problems
* with the write verifier.
*/
xfs_trans_bhold(args->trans, attr->xattri_leaf_bp);
attr->xattri_dela_state = XFS_DAS_LEAF_ADD; attr->xattri_dela_state = XFS_DAS_LEAF_ADD;
out: out:
trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp); trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp);
@ -447,11 +441,9 @@ xfs_attr_leaf_addname(
/* /*
* Use the leaf buffer we may already hold locked as a result of * Use the leaf buffer we may already hold locked as a result of
* a sf-to-leaf conversion. The held buffer is no longer valid * a sf-to-leaf conversion.
* after this call, regardless of the result.
*/ */
error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp); error = xfs_attr_leaf_try_add(args);
attr->xattri_leaf_bp = NULL;
if (error == -ENOSPC) { if (error == -ENOSPC) {
error = xfs_attr3_leaf_to_node(args); error = xfs_attr3_leaf_to_node(args);
@ -497,8 +489,6 @@ xfs_attr_node_addname(
struct xfs_da_args *args = attr->xattri_da_args; struct xfs_da_args *args = attr->xattri_da_args;
int error; int error;
ASSERT(!attr->xattri_leaf_bp);
error = xfs_attr_node_addname_find_attr(attr); error = xfs_attr_node_addname_find_attr(attr);
if (error) if (error)
return error; return error;
@ -1215,24 +1205,14 @@ xfs_attr_restore_rmt_blk(
*/ */
STATIC int STATIC int
xfs_attr_leaf_try_add( xfs_attr_leaf_try_add(
struct xfs_da_args *args, struct xfs_da_args *args)
struct xfs_buf *bp)
{ {
struct xfs_buf *bp;
int error; int error;
/* error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
* If the caller provided a buffer to us, it is locked and held in if (error)
* the transaction because it just did a shortform to leaf conversion. return error;
* Hence we don't need to read it again. Otherwise read in the leaf
* buffer.
*/
if (bp) {
xfs_trans_bhold_release(args->trans, bp);
} else {
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
if (error)
return error;
}
/* /*
* Look up the xattr name to set the insertion point for the new xattr. * Look up the xattr name to set the insertion point for the new xattr.

View File

@ -515,11 +515,6 @@ struct xfs_attr_intent {
*/ */
struct xfs_attri_log_nameval *xattri_nameval; struct xfs_attri_log_nameval *xattri_nameval;
/*
* Used by xfs_attr_set to hold a leaf buffer across a transaction roll
*/
struct xfs_buf *xattri_leaf_bp;
/* Used to keep track of current state of delayed operation */ /* Used to keep track of current state of delayed operation */
enum xfs_delattr_state xattri_dela_state; enum xfs_delattr_state xattri_dela_state;

View File

@ -289,6 +289,23 @@ xfs_attr3_leaf_verify_entry(
return NULL; return NULL;
} }
/*
* Validate an attribute leaf block.
*
* Empty leaf blocks can occur under the following circumstances:
*
* 1. setxattr adds a new extended attribute to a file;
* 2. The file has zero existing attributes;
* 3. The attribute is too large to fit in the attribute fork;
* 4. The attribute is small enough to fit in a leaf block;
* 5. A log flush occurs after committing the transaction that creates
* the (empty) leaf block; and
* 6. The filesystem goes down after the log flush but before the new
* attribute can be committed to the leaf block.
*
* Hence we need to ensure that we don't fail the validation purely
* because the leaf is empty.
*/
static xfs_failaddr_t static xfs_failaddr_t
xfs_attr3_leaf_verify( xfs_attr3_leaf_verify(
struct xfs_buf *bp) struct xfs_buf *bp)
@ -310,15 +327,6 @@ xfs_attr3_leaf_verify(
if (fa) if (fa)
return fa; return fa;
/*
* Empty leaf blocks should never occur; they imply the existence of a
* software bug that needs fixing. xfs_repair also flags them as a
* corruption that needs fixing, so we should never let these go to
* disk.
*/
if (ichdr.count == 0)
return __this_address;
/* /*
* firstused is the block offset of the first name info structure. * firstused is the block offset of the first name info structure.
* Make sure it doesn't go off the block or crash into the header. * Make sure it doesn't go off the block or crash into the header.
@ -922,14 +930,10 @@ xfs_attr_shortform_getvalue(
return -ENOATTR; return -ENOATTR;
} }
/* /* Convert from using the shortform to the leaf format. */
* Convert from using the shortform to the leaf. On success, return the
* buffer so that we can keep it locked until we're totally done with it.
*/
int int
xfs_attr_shortform_to_leaf( xfs_attr_shortform_to_leaf(
struct xfs_da_args *args, struct xfs_da_args *args)
struct xfs_buf **leaf_bp)
{ {
struct xfs_inode *dp; struct xfs_inode *dp;
struct xfs_attr_shortform *sf; struct xfs_attr_shortform *sf;
@ -991,7 +995,6 @@ xfs_attr_shortform_to_leaf(
sfe = xfs_attr_sf_nextentry(sfe); sfe = xfs_attr_sf_nextentry(sfe);
} }
error = 0; error = 0;
*leaf_bp = bp;
out: out:
kmem_free(tmpbuffer); kmem_free(tmpbuffer);
return error; return error;

View File

@ -49,8 +49,7 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_lookup(struct xfs_da_args *args);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
struct xfs_buf **leaf_bp);
int xfs_attr_sf_removename(struct xfs_da_args *args); int xfs_attr_sf_removename(struct xfs_da_args *args);
int xfs_attr_sf_findname(struct xfs_da_args *args, int xfs_attr_sf_findname(struct xfs_da_args *args,
struct xfs_attr_sf_entry **sfep, struct xfs_attr_sf_entry **sfep,

View File

@ -576,7 +576,7 @@ xfs_attri_item_recover(
struct xfs_trans_res tres; struct xfs_trans_res tres;
struct xfs_attri_log_format *attrp; struct xfs_attri_log_format *attrp;
struct xfs_attri_log_nameval *nv = attrip->attri_nameval; struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
int error, ret = 0; int error;
int total; int total;
int local; int local;
struct xfs_attrd_log_item *done_item = NULL; struct xfs_attrd_log_item *done_item = NULL;
@ -655,29 +655,32 @@ xfs_attri_item_recover(
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
ret = xfs_xattri_finish_update(attr, done_item); error = xfs_xattri_finish_update(attr, done_item);
if (ret == -EAGAIN) { if (error == -EAGAIN) {
/* There's more work to do, so add it to this transaction */ /*
* There's more work to do, so add the intent item to this
* transaction so that we can continue it later.
*/
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list);
} else error = xfs_defer_ops_capture_and_commit(tp, capture_list);
error = ret; if (error)
goto out_unlock;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_irele(ip);
return 0;
}
if (error) { if (error) {
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
goto out_unlock; goto out_unlock;
} }
error = xfs_defer_ops_capture_and_commit(tp, capture_list); error = xfs_defer_ops_capture_and_commit(tp, capture_list);
out_unlock: out_unlock:
if (attr->xattri_leaf_bp)
xfs_buf_relse(attr->xattri_leaf_bp);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_irele(ip); xfs_irele(ip);
out: out:
if (ret != -EAGAIN) xfs_attr_free_item(attr);
xfs_attr_free_item(attr);
return error; return error;
} }

View File

@ -686,6 +686,8 @@ xfs_can_free_eofblocks(
* forever. * forever.
*/ */
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb) if (last_fsb <= end_fsb)
return false; return false;

View File

@ -440,7 +440,7 @@ xfs_inodegc_queue_all(
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu); gc = per_cpu_ptr(mp->m_inodegc, cpu);
if (!llist_empty(&gc->list)) if (!llist_empty(&gc->list))
queue_work_on(cpu, mp->m_inodegc_wq, &gc->work); mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
} }
} }
@ -1841,8 +1841,8 @@ void
xfs_inodegc_worker( xfs_inodegc_worker(
struct work_struct *work) struct work_struct *work)
{ {
struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc, struct xfs_inodegc *gc = container_of(to_delayed_work(work),
work); struct xfs_inodegc, work);
struct llist_node *node = llist_del_all(&gc->list); struct llist_node *node = llist_del_all(&gc->list);
struct xfs_inode *ip, *n; struct xfs_inode *ip, *n;
@ -1861,6 +1861,20 @@ xfs_inodegc_worker(
} }
} }
/*
* Expedite all pending inodegc work to run immediately. This does not wait for
* completion of the work.
*/
void
xfs_inodegc_push(
struct xfs_mount *mp)
{
if (!xfs_is_inodegc_enabled(mp))
return;
trace_xfs_inodegc_push(mp, __return_address);
xfs_inodegc_queue_all(mp);
}
/* /*
* Force all currently queued inode inactivation work to run immediately and * Force all currently queued inode inactivation work to run immediately and
* wait for the work to finish. * wait for the work to finish.
@ -1869,12 +1883,8 @@ void
xfs_inodegc_flush( xfs_inodegc_flush(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
if (!xfs_is_inodegc_enabled(mp)) xfs_inodegc_push(mp);
return;
trace_xfs_inodegc_flush(mp, __return_address); trace_xfs_inodegc_flush(mp, __return_address);
xfs_inodegc_queue_all(mp);
flush_workqueue(mp->m_inodegc_wq); flush_workqueue(mp->m_inodegc_wq);
} }
@ -2014,6 +2024,7 @@ xfs_inodegc_queue(
struct xfs_inodegc *gc; struct xfs_inodegc *gc;
int items; int items;
unsigned int shrinker_hits; unsigned int shrinker_hits;
unsigned long queue_delay = 1;
trace_xfs_inode_set_need_inactive(ip); trace_xfs_inode_set_need_inactive(ip);
spin_lock(&ip->i_flags_lock); spin_lock(&ip->i_flags_lock);
@ -2025,19 +2036,26 @@ xfs_inodegc_queue(
items = READ_ONCE(gc->items); items = READ_ONCE(gc->items);
WRITE_ONCE(gc->items, items + 1); WRITE_ONCE(gc->items, items + 1);
shrinker_hits = READ_ONCE(gc->shrinker_hits); shrinker_hits = READ_ONCE(gc->shrinker_hits);
put_cpu_ptr(gc);
if (!xfs_is_inodegc_enabled(mp)) /*
* We queue the work while holding the current CPU so that the work
* is scheduled to run on this CPU.
*/
if (!xfs_is_inodegc_enabled(mp)) {
put_cpu_ptr(gc);
return; return;
if (xfs_inodegc_want_queue_work(ip, items)) {
trace_xfs_inodegc_queue(mp, __return_address);
queue_work(mp->m_inodegc_wq, &gc->work);
} }
if (xfs_inodegc_want_queue_work(ip, items))
queue_delay = 0;
trace_xfs_inodegc_queue(mp, __return_address);
mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
put_cpu_ptr(gc);
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
trace_xfs_inodegc_throttle(mp, __return_address); trace_xfs_inodegc_throttle(mp, __return_address);
flush_work(&gc->work); flush_delayed_work(&gc->work);
} }
} }
@ -2054,7 +2072,7 @@ xfs_inodegc_cpu_dead(
unsigned int count = 0; unsigned int count = 0;
dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
cancel_work_sync(&dead_gc->work); cancel_delayed_work_sync(&dead_gc->work);
if (llist_empty(&dead_gc->list)) if (llist_empty(&dead_gc->list))
return; return;
@ -2073,12 +2091,12 @@ xfs_inodegc_cpu_dead(
llist_add_batch(first, last, &gc->list); llist_add_batch(first, last, &gc->list);
count += READ_ONCE(gc->items); count += READ_ONCE(gc->items);
WRITE_ONCE(gc->items, count); WRITE_ONCE(gc->items, count);
put_cpu_ptr(gc);
if (xfs_is_inodegc_enabled(mp)) { if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address); trace_xfs_inodegc_queue(mp, __return_address);
queue_work(mp->m_inodegc_wq, &gc->work); mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
} }
put_cpu_ptr(gc);
} }
/* /*
@ -2173,7 +2191,7 @@ xfs_inodegc_shrinker_scan(
unsigned int h = READ_ONCE(gc->shrinker_hits); unsigned int h = READ_ONCE(gc->shrinker_hits);
WRITE_ONCE(gc->shrinker_hits, h + 1); WRITE_ONCE(gc->shrinker_hits, h + 1);
queue_work_on(cpu, mp->m_inodegc_wq, &gc->work); mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
no_items = false; no_items = false;
} }
} }

View File

@ -76,6 +76,7 @@ void xfs_blockgc_stop(struct xfs_mount *mp);
void xfs_blockgc_start(struct xfs_mount *mp); void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work); void xfs_inodegc_worker(struct work_struct *work);
void xfs_inodegc_push(struct xfs_mount *mp);
void xfs_inodegc_flush(struct xfs_mount *mp); void xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp); void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp); void xfs_inodegc_start(struct xfs_mount *mp);

View File

@ -131,6 +131,26 @@ xfs_ilock_attr_map_shared(
return lock_mode; return lock_mode;
} }
/*
* You can't set both SHARED and EXCL for the same lock,
* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
* XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
* to set in lock_flags.
*/
static inline void
xfs_lock_flags_assert(
uint lock_flags)
{
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
ASSERT(lock_flags != 0);
}
/* /*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2 * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
* multi-reader locks: invalidate_lock and the i_lock. This routine allows * multi-reader locks: invalidate_lock and the i_lock. This routine allows
@ -168,18 +188,7 @@ xfs_ilock(
{ {
trace_xfs_ilock(ip, lock_flags, _RET_IP_); trace_xfs_ilock(ip, lock_flags, _RET_IP_);
/* xfs_lock_flags_assert(lock_flags);
* You can't set both SHARED and EXCL for the same lock,
* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
*/
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) { if (lock_flags & XFS_IOLOCK_EXCL) {
down_write_nested(&VFS_I(ip)->i_rwsem, down_write_nested(&VFS_I(ip)->i_rwsem,
@ -222,18 +231,7 @@ xfs_ilock_nowait(
{ {
trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
/* xfs_lock_flags_assert(lock_flags);
* You can't set both SHARED and EXCL for the same lock,
* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
*/
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) { if (lock_flags & XFS_IOLOCK_EXCL) {
if (!down_write_trylock(&VFS_I(ip)->i_rwsem)) if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
@ -291,19 +289,7 @@ xfs_iunlock(
xfs_inode_t *ip, xfs_inode_t *ip,
uint lock_flags) uint lock_flags)
{ {
/* xfs_lock_flags_assert(lock_flags);
* You can't set both SHARED and EXCL for the same lock,
* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
*/
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL) if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem); up_write(&VFS_I(ip)->i_rwsem);
@ -379,8 +365,8 @@ xfs_isilocked(
} }
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
(lock_flags & XFS_IOLOCK_SHARED)); (lock_flags & XFS_MMAPLOCK_SHARED));
} }
if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) { if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {

View File

@ -2092,8 +2092,6 @@ xlog_dealloc_log(
xlog_in_core_t *iclog, *next_iclog; xlog_in_core_t *iclog, *next_iclog;
int i; int i;
xlog_cil_destroy(log);
/* /*
* Cycle all the iclogbuf locks to make sure all log IO completion * Cycle all the iclogbuf locks to make sure all log IO completion
* is done before we tear down these buffers. * is done before we tear down these buffers.
@ -2105,6 +2103,13 @@ xlog_dealloc_log(
iclog = iclog->ic_next; iclog = iclog->ic_next;
} }
/*
* Destroy the CIL after waiting for iclog IO completion because an
* iclog EIO error will try to shut down the log, which accesses the
* CIL to wake up the waiters.
*/
xlog_cil_destroy(log);
iclog = log->l_iclog; iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) { for (i = 0; i < log->l_iclog_bufs; i++) {
next_iclog = iclog->ic_next; next_iclog = iclog->ic_next;

View File

@ -61,7 +61,7 @@ struct xfs_error_cfg {
*/ */
struct xfs_inodegc { struct xfs_inodegc {
struct llist_head list; struct llist_head list;
struct work_struct work; struct delayed_work work;
/* approximate count of inodes in the list */ /* approximate count of inodes in the list */
unsigned int items; unsigned int items;

View File

@ -454,9 +454,12 @@ xfs_qm_scall_getquota(
struct xfs_dquot *dqp; struct xfs_dquot *dqp;
int error; int error;
/* Flush inodegc work at the start of a quota reporting scan. */ /*
* Expedite pending inodegc work at the start of a quota reporting
* scan but don't block waiting for it to complete.
*/
if (id == 0) if (id == 0)
xfs_inodegc_flush(mp); xfs_inodegc_push(mp);
/* /*
* Try to get the dquot. We don't want it allocated on disk, so don't * Try to get the dquot. We don't want it allocated on disk, so don't
@ -498,7 +501,7 @@ xfs_qm_scall_getquota_next(
/* Flush inodegc work at the start of a quota reporting scan. */ /* Flush inodegc work at the start of a quota reporting scan. */
if (*id == 0) if (*id == 0)
xfs_inodegc_flush(mp); xfs_inodegc_push(mp);
error = xfs_qm_dqget_next(mp, *id, type, &dqp); error = xfs_qm_dqget_next(mp, *id, type, &dqp);
if (error) if (error)

View File

@ -797,8 +797,11 @@ xfs_fs_statfs(
xfs_extlen_t lsize; xfs_extlen_t lsize;
int64_t ffree; int64_t ffree;
/* Wait for whatever inactivations are in progress. */ /*
xfs_inodegc_flush(mp); * Expedite background inodegc but don't wait. We do not want to block
* here waiting hours for a billion extent file to be truncated.
*/
xfs_inodegc_push(mp);
statp->f_type = XFS_SUPER_MAGIC; statp->f_type = XFS_SUPER_MAGIC;
statp->f_namelen = MAXNAMELEN - 1; statp->f_namelen = MAXNAMELEN - 1;
@ -1074,7 +1077,7 @@ xfs_inodegc_init_percpu(
gc = per_cpu_ptr(mp->m_inodegc, cpu); gc = per_cpu_ptr(mp->m_inodegc, cpu);
init_llist_head(&gc->list); init_llist_head(&gc->list);
gc->items = 0; gc->items = 0;
INIT_WORK(&gc->work, xfs_inodegc_worker); INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
} }
return 0; return 0;
} }

View File

@ -240,6 +240,7 @@ DEFINE_EVENT(xfs_fs_class, name, \
TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ TP_PROTO(struct xfs_mount *mp, void *caller_ip), \
TP_ARGS(mp, caller_ip)) TP_ARGS(mp, caller_ip))
DEFINE_FS_EVENT(xfs_inodegc_flush); DEFINE_FS_EVENT(xfs_inodegc_flush);
DEFINE_FS_EVENT(xfs_inodegc_push);
DEFINE_FS_EVENT(xfs_inodegc_start); DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop); DEFINE_FS_EVENT(xfs_inodegc_stop);
DEFINE_FS_EVENT(xfs_inodegc_queue); DEFINE_FS_EVENT(xfs_inodegc_queue);