ext4: journal credits calulation cleanup and fix for non-extent writepage

When considering how many journal credits are needed for modifying a
chunk of data, we need to account for the super block, inode block,
quota blocks and xattr block, indirect/index blocks, also, group bitmap
and group descriptor blocks for new allocation (including data and
indirect/index blocks). There are many places in ext4 do the calculation
on their own and often missed one or two meta blocks, and often they
assume single block allocation, and did not considering the multile
chunk of allocation case.

This patch is trying to cleanup current journal credit code, provides
some common helper funtion to calculate the journal credits, to be used
for writepage, writepages, DIO, fallocate, migration, defrag, and for
both nonextent and extent files.

This patch modified the writepage/write_begin credit caculation for
nonextent files, to use the new helper function. It also fixed the
problem that writepage on nonextent files did not consider the case
blocksize <pagesize, thus could possibelly need multiple block
allocation in a single transaction.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Mingming Cao 2008-08-19 22:16:07 -04:00 committed by Theodore Ts'o
parent c001077f40
commit a02908f19c
3 changed files with 115 additions and 41 deletions

View File

@ -1072,6 +1072,7 @@ extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode); extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
extern int ext4_block_truncate_page(handle_t *handle, extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from); struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@ -1227,6 +1228,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* extents.c */ /* extents.c */
extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
int chunk);
extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_lblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result, unsigned long max_blocks, struct buffer_head *bh_result,

View File

@ -51,6 +51,14 @@
EXT4_XATTR_TRANS_BLOCKS - 2 + \ EXT4_XATTR_TRANS_BLOCKS - 2 + \
2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
/*
* Define the number of metadata blocks we need to account to modify data.
*
* This include super block, inode block, quota blocks and xattr blocks
*/
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
2*EXT4_QUOTA_TRANS_BLOCKS(sb))
/* Delete operations potentially hit one directory's namespace plus an /* Delete operations potentially hit one directory's namespace plus an
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
* generous. We can grow the delete transaction later if necessary. */ * generous. We can grow the delete transaction later if necessary. */

View File

@ -4354,56 +4354,119 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0; return 0;
} }
/* static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
* How many blocks doth make a writepage()? int chunk)
*
* With N blocks per page, it may be:
* N data blocks
* 2 indirect block
* 2 dindirect
* 1 tindirect
* N+5 bitmap blocks (from the above)
* N+5 group descriptor summary blocks
* 1 inode block
* 1 superblock.
* 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
*
* 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
*
* With ordered or writeback data it's the same, less the N data blocks.
*
* If the inode's direct blocks can hold an integral number of pages then a
* page cannot straddle two indirect blocks, and we can only touch one indirect
* and dindirect block, and the "5" above becomes "3".
*
* This still overestimates under most circumstances. If we were to pass the
* start and end offsets in here as well we could do block_to_path() on each
* block and work out the exact number of indirects which are touched. Pah.
*/
int ext4_writepage_trans_blocks(struct inode *inode)
{ {
int bpp = ext4_journal_blocks_per_page(inode); int indirects;
int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
int ret;
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) /* if nrblocks are contiguous */
return ext4_ext_writepage_trans_blocks(inode, bpp); if (chunk) {
/*
* With N contiguous data blocks, it need at most
* N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
* 2 dindirect blocks
* 1 tindirect block
*/
indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
return indirects + 3;
}
/*
* if nrblocks are not contiguous, worse case, each block touch
* a indirect block, and each indirect block touch a double indirect
* block, plus a triple indirect block
*/
indirects = nrblocks * 2 + 1;
return indirects;
}
if (ext4_should_journal_data(inode)) static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
ret = 3 * (bpp + indirects) + 2; {
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return ext4_indirect_trans_blocks(inode, nrblocks, 0);
return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
}
/*
* Account for index blocks, block groups bitmaps and block group
* descriptor blocks if modify datablocks and index blocks
* worse case, the indexs blocks spread over different block groups
*
* If datablocks are discontiguous, they are possible to spread over
* different block groups too. If they are contiugous, with flexbg,
* they could still across block group boundary.
*
* Also account for superblock, inode, quota and xattr blocks
*/
int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
int groups, gdpblocks;
int idxblocks;
int ret = 0;
/*
* How many index blocks need to touch to modify nrblocks?
* The "Chunk" flag indicating whether the nrblocks is
* physically contiguous on disk
*
* For Direct IO and fallocate, they calls get_block to allocate
* one single extent at a time, so they could set the "Chunk" flag
*/
idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
ret = idxblocks;
/*
* Now let's see how many group bitmaps and group descriptors need
* to account
*/
groups = idxblocks;
if (chunk)
groups += 1;
else else
ret = 2 * (bpp + indirects) + 2; groups += nrblocks;
#ifdef CONFIG_QUOTA gdpblocks = groups;
/* We know that structure was already allocated during DQUOT_INIT so if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
* we will be updating only the data blocks + inodes */ groups = EXT4_SB(inode->i_sb)->s_groups_count;
ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
#endif gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
/* bitmaps and block group descriptor blocks */
ret += groups + gdpblocks;
/* Blocks for super block, inode, quota and xattr blocks */
ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
return ret; return ret;
} }
/*
* Calulate the total number of credits to reserve to fit
* the modification of a single pages into a single transaction
*
* This could be called via ext4_write_begin() or later
* ext4_da_writepages() in delalyed allocation case.
*
* In both case it's possible that we could allocating multiple
* chunks of blocks. We need to consider the worse case, when
* one new block per extent.
*
* For Direct IO and fallocate, the journal credits reservation
* is based on one single extent allocation, so they could use
* EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
* chunk of allocation needs.
*/
int ext4_writepage_trans_blocks(struct inode *inode)
{
int bpp = ext4_journal_blocks_per_page(inode);
int ret;
ret = ext4_meta_trans_blocks(inode, bpp, 0);
/* Account for data blocks for journalled mode */
if (ext4_should_journal_data(inode))
ret += bpp;
return ret;
}
/* /*
* The caller must have previously called ext4_reserve_inode_write(). * The caller must have previously called ext4_reserve_inode_write().
* Give this, we know that the caller already has write access to iloc->bh. * Give this, we know that the caller already has write access to iloc->bh.