xfs: fix chown leaking delalloc quota blocks when fssetxattr fails

While refactoring the quota code to create a function to allocate inode
change transactions, I noticed that xfs_qm_vop_chown_reserve does more
than just make reservations: it also *modifies* the incore counts
directly to handle the owner id change for the delalloc blocks.

I then observed that the fssetxattr code continues validating input
arguments after making the quota reservation but before dirtying the
transaction.  If the routine decides to error out, it fails to undo the
accounting switch!  This leads to incorrect quota reservation and
failure down the line.

We can fix this by making the reservation function do only that -- for
the new dquot, it reserves ondisk and delalloc blocks to the
transaction, and the old dquot hangs on to its incore reservation for
now.  Once we actually switch the dquots, we can then update the incore
reservations because we've dirtied the transaction and it's too late to
turn back now.

No fixes tag because this has been broken since the start of git.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
This commit is contained in:
Darrick J. Wong 2021-01-29 19:06:10 -08:00
parent 560ab6c0d1
commit 1aecf3734a
1 changed files with 35 additions and 57 deletions

View File

@ -1785,6 +1785,29 @@ xfs_qm_vop_chown(
xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
* Back when we made quota reservations for the chown, we reserved the
* ondisk blocks + delalloc blocks with the new dquot. Now that we've
* switched the dquots, decrease the new dquot's block reservation
* (having already bumped up the real counter) so that we don't have
* any reservation to give back when we commit.
*/
xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
-ip->i_delayed_blks);
/*
* Give the incore reservation for delalloc blocks back to the old
* dquot. We don't normally handle delalloc quota reservations
* transactionally, so just lock the dquot and subtract from the
* reservation. Dirty the transaction because it's too late to turn
* back now.
*/
tp->t_flags |= XFS_TRANS_DIRTY;
xfs_dqlock(prevdq);
ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
prevdq->q_blk.reserved -= ip->i_delayed_blks;
xfs_dqunlock(prevdq);
/* /*
* Take an extra reference, because the inode is going to keep * Take an extra reference, because the inode is going to keep
* this dquot pointer even after the trans_commit. * this dquot pointer even after the trans_commit.
@ -1807,84 +1830,39 @@ xfs_qm_vop_chown_reserve(
uint flags) uint flags)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
uint64_t delblks;
unsigned int blkflags; unsigned int blkflags;
struct xfs_dquot *udq_unres = NULL;
struct xfs_dquot *gdq_unres = NULL;
struct xfs_dquot *pdq_unres = NULL;
struct xfs_dquot *udq_delblks = NULL; struct xfs_dquot *udq_delblks = NULL;
struct xfs_dquot *gdq_delblks = NULL; struct xfs_dquot *gdq_delblks = NULL;
struct xfs_dquot *pdq_delblks = NULL; struct xfs_dquot *pdq_delblks = NULL;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(XFS_IS_QUOTA_RUNNING(mp)); ASSERT(XFS_IS_QUOTA_RUNNING(mp));
delblks = ip->i_delayed_blks;
blkflags = XFS_IS_REALTIME_INODE(ip) ? blkflags = XFS_IS_REALTIME_INODE(ip) ?
XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
if (XFS_IS_UQUOTA_ON(mp) && udqp && if (XFS_IS_UQUOTA_ON(mp) && udqp &&
i_uid_read(VFS_I(ip)) != udqp->q_id) { i_uid_read(VFS_I(ip)) != udqp->q_id)
udq_delblks = udqp; udq_delblks = udqp;
/*
* If there are delayed allocation blocks, then we have to
* unreserve those from the old dquot, and add them to the
* new dquot.
*/
if (delblks) {
ASSERT(ip->i_udquot);
udq_unres = ip->i_udquot;
}
}
if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
i_gid_read(VFS_I(ip)) != gdqp->q_id) { i_gid_read(VFS_I(ip)) != gdqp->q_id)
gdq_delblks = gdqp; gdq_delblks = gdqp;
if (delblks) {
ASSERT(ip->i_gdquot);
gdq_unres = ip->i_gdquot;
}
}
if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
ip->i_d.di_projid != pdqp->q_id) { ip->i_d.di_projid != pdqp->q_id)
pdq_delblks = pdqp; pdq_delblks = pdqp;
if (delblks) {
ASSERT(ip->i_pdquot);
pdq_unres = ip->i_pdquot;
}
}
error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
udq_delblks, gdq_delblks, pdq_delblks,
ip->i_d.di_nblocks, 1, flags | blkflags);
if (error)
return error;
/* /*
* Do the delayed blks reservations/unreservations now. Since, these * Reserve enough quota to handle blocks on disk and reserved for a
* are done without the help of a transaction, if a reservation fails * delayed allocation. We'll actually transfer the delalloc
* its previous reservations won't be automatically undone by trans * reservation between dquots at chown time, even though that part is
* code. So, we have to do it manually here. * only semi-transactional.
*/ */
if (delblks) { return xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, udq_delblks,
/* gdq_delblks, pdq_delblks,
* Do the reservations first. Unreservation can't fail. ip->i_d.di_nblocks + ip->i_delayed_blks,
*/ 1, blkflags | flags);
ASSERT(udq_delblks || gdq_delblks || pdq_delblks);
ASSERT(udq_unres || gdq_unres || pdq_unres);
error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
udq_delblks, gdq_delblks, pdq_delblks,
(xfs_qcnt_t)delblks, 0, flags | blkflags);
if (error)
return error;
xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
udq_unres, gdq_unres, pdq_unres,
-((xfs_qcnt_t)delblks), 0, blkflags);
}
return 0;
} }
int int