mirror of https://gitee.com/openkylin/linux.git
ext4: journal credits calulation cleanup and fix for non-extent writepage
When considering how many journal credits are needed for modifying a chunk of data, we need to account for the super block, inode block, quota blocks and xattr block, indirect/index blocks, also, group bitmap and group descriptor blocks for new allocation (including data and indirect/index blocks). There are many places in ext4 do the calculation on their own and often missed one or two meta blocks, and often they assume single block allocation, and did not considering the multile chunk of allocation case. This patch is trying to cleanup current journal credit code, provides some common helper funtion to calculate the journal credits, to be used for writepage, writepages, DIO, fallocate, migration, defrag, and for both nonextent and extent files. This patch modified the writepage/write_begin credit caculation for nonextent files, to use the new helper function. It also fixed the problem that writepage on nonextent files did not consider the case blocksize <pagesize, thus could possibelly need multiple block allocation in a single transaction. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
c001077f40
commit
a02908f19c
|
@ -1072,6 +1072,7 @@ extern void ext4_set_inode_flags(struct inode *);
|
|||
extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
||||
extern void ext4_set_aops(struct inode *inode);
|
||||
extern int ext4_writepage_trans_blocks(struct inode *);
|
||||
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
|
||||
extern int ext4_block_truncate_page(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from);
|
||||
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
|
||||
|
@ -1227,6 +1228,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
|
|||
/* extents.c */
|
||||
extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
|
||||
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
|
||||
extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
|
||||
int chunk);
|
||||
extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t iblock,
|
||||
unsigned long max_blocks, struct buffer_head *bh_result,
|
||||
|
|
|
@ -51,6 +51,14 @@
|
|||
EXT4_XATTR_TRANS_BLOCKS - 2 + \
|
||||
2*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
||||
|
||||
/*
|
||||
* Define the number of metadata blocks we need to account to modify data.
|
||||
*
|
||||
* This include super block, inode block, quota blocks and xattr blocks
|
||||
*/
|
||||
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
|
||||
2*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
||||
|
||||
/* Delete operations potentially hit one directory's namespace plus an
|
||||
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
|
||||
* generous. We can grow the delete transaction later if necessary. */
|
||||
|
|
145
fs/ext4/inode.c
145
fs/ext4/inode.c
|
@ -4354,56 +4354,119 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* How many blocks doth make a writepage()?
|
||||
*
|
||||
* With N blocks per page, it may be:
|
||||
* N data blocks
|
||||
* 2 indirect block
|
||||
* 2 dindirect
|
||||
* 1 tindirect
|
||||
* N+5 bitmap blocks (from the above)
|
||||
* N+5 group descriptor summary blocks
|
||||
* 1 inode block
|
||||
* 1 superblock.
|
||||
* 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
|
||||
*
|
||||
* 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
|
||||
*
|
||||
* With ordered or writeback data it's the same, less the N data blocks.
|
||||
*
|
||||
* If the inode's direct blocks can hold an integral number of pages then a
|
||||
* page cannot straddle two indirect blocks, and we can only touch one indirect
|
||||
* and dindirect block, and the "5" above becomes "3".
|
||||
*
|
||||
* This still overestimates under most circumstances. If we were to pass the
|
||||
* start and end offsets in here as well we could do block_to_path() on each
|
||||
* block and work out the exact number of indirects which are touched. Pah.
|
||||
*/
|
||||
|
||||
int ext4_writepage_trans_blocks(struct inode *inode)
|
||||
static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
|
||||
int chunk)
|
||||
{
|
||||
int bpp = ext4_journal_blocks_per_page(inode);
|
||||
int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
|
||||
int ret;
|
||||
int indirects;
|
||||
|
||||
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
|
||||
return ext4_ext_writepage_trans_blocks(inode, bpp);
|
||||
/* if nrblocks are contiguous */
|
||||
if (chunk) {
|
||||
/*
|
||||
* With N contiguous data blocks, it need at most
|
||||
* N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
|
||||
* 2 dindirect blocks
|
||||
* 1 tindirect block
|
||||
*/
|
||||
indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
|
||||
return indirects + 3;
|
||||
}
|
||||
/*
|
||||
* if nrblocks are not contiguous, worse case, each block touch
|
||||
* a indirect block, and each indirect block touch a double indirect
|
||||
* block, plus a triple indirect block
|
||||
*/
|
||||
indirects = nrblocks * 2 + 1;
|
||||
return indirects;
|
||||
}
|
||||
|
||||
if (ext4_should_journal_data(inode))
|
||||
ret = 3 * (bpp + indirects) + 2;
|
||||
static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||
{
|
||||
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
|
||||
return ext4_indirect_trans_blocks(inode, nrblocks, 0);
|
||||
return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
|
||||
}
|
||||
/*
|
||||
* Account for index blocks, block groups bitmaps and block group
|
||||
* descriptor blocks if modify datablocks and index blocks
|
||||
* worse case, the indexs blocks spread over different block groups
|
||||
*
|
||||
* If datablocks are discontiguous, they are possible to spread over
|
||||
* different block groups too. If they are contiugous, with flexbg,
|
||||
* they could still across block group boundary.
|
||||
*
|
||||
* Also account for superblock, inode, quota and xattr blocks
|
||||
*/
|
||||
int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||
{
|
||||
int groups, gdpblocks;
|
||||
int idxblocks;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* How many index blocks need to touch to modify nrblocks?
|
||||
* The "Chunk" flag indicating whether the nrblocks is
|
||||
* physically contiguous on disk
|
||||
*
|
||||
* For Direct IO and fallocate, they calls get_block to allocate
|
||||
* one single extent at a time, so they could set the "Chunk" flag
|
||||
*/
|
||||
idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
|
||||
|
||||
ret = idxblocks;
|
||||
|
||||
/*
|
||||
* Now let's see how many group bitmaps and group descriptors need
|
||||
* to account
|
||||
*/
|
||||
groups = idxblocks;
|
||||
if (chunk)
|
||||
groups += 1;
|
||||
else
|
||||
ret = 2 * (bpp + indirects) + 2;
|
||||
groups += nrblocks;
|
||||
|
||||
#ifdef CONFIG_QUOTA
|
||||
/* We know that structure was already allocated during DQUOT_INIT so
|
||||
* we will be updating only the data blocks + inodes */
|
||||
ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
|
||||
#endif
|
||||
gdpblocks = groups;
|
||||
if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
|
||||
groups = EXT4_SB(inode->i_sb)->s_groups_count;
|
||||
if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
|
||||
gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
|
||||
|
||||
/* bitmaps and block group descriptor blocks */
|
||||
ret += groups + gdpblocks;
|
||||
|
||||
/* Blocks for super block, inode, quota and xattr blocks */
|
||||
ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calulate the total number of credits to reserve to fit
|
||||
* the modification of a single pages into a single transaction
|
||||
*
|
||||
* This could be called via ext4_write_begin() or later
|
||||
* ext4_da_writepages() in delalyed allocation case.
|
||||
*
|
||||
* In both case it's possible that we could allocating multiple
|
||||
* chunks of blocks. We need to consider the worse case, when
|
||||
* one new block per extent.
|
||||
*
|
||||
* For Direct IO and fallocate, the journal credits reservation
|
||||
* is based on one single extent allocation, so they could use
|
||||
* EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
|
||||
* chunk of allocation needs.
|
||||
*/
|
||||
int ext4_writepage_trans_blocks(struct inode *inode)
|
||||
{
|
||||
int bpp = ext4_journal_blocks_per_page(inode);
|
||||
int ret;
|
||||
|
||||
ret = ext4_meta_trans_blocks(inode, bpp, 0);
|
||||
|
||||
/* Account for data blocks for journalled mode */
|
||||
if (ext4_should_journal_data(inode))
|
||||
ret += bpp;
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* The caller must have previously called ext4_reserve_inode_write().
|
||||
* Give this, we know that the caller already has write access to iloc->bh.
|
||||
|
|
Loading…
Reference in New Issue