mirror of https://gitee.com/openkylin/linux.git
ext4: Add multi block allocator for ext4
Signed-off-by: Alex Tomas <alex@clusterfs.com> Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
1988b51e47
commit
c9de560ded
|
@ -86,9 +86,11 @@ Alex is working on a new set of patches right now.
|
|||
When mounting an ext4 filesystem, the following option are accepted:
|
||||
(*) == default
|
||||
|
||||
extents ext4 will use extents to address file data. The
|
||||
extents (*) ext4 will use extents to address file data. The
|
||||
file system will no longer be mountable by ext3.
|
||||
|
||||
noextents ext4 will not use extents for newly created files
|
||||
|
||||
journal_checksum Enable checksumming of the journal transactions.
|
||||
This will allow the recovery code in e2fsck and the
|
||||
kernel to detect corruption in the kernel. It is a
|
||||
|
@ -206,6 +208,12 @@ nobh (a) cache disk block mapping information
|
|||
"nobh" option tries to avoid associating buffer
|
||||
heads (supported only for "writeback" mode).
|
||||
|
||||
mballoc (*) Use the multiple block allocator for block allocation
|
||||
nomballoc disabled multiple block allocator for block allocation.
|
||||
stripe=n Number of filesystem blocks that mballoc will try
|
||||
to use for allocation size and alignment. For RAID5/6
|
||||
systems this should be the number of data
|
||||
disks * RAID chunk size in file system blocks.
|
||||
|
||||
Data Mode
|
||||
---------
|
||||
|
|
|
@ -857,6 +857,45 @@ CPUs.
|
|||
The "procs_blocked" line gives the number of processes currently blocked,
|
||||
waiting for I/O to complete.
|
||||
|
||||
1.9 Ext4 file system parameters
|
||||
------------------------------
|
||||
Ext4 file system have one directory per partition under /proc/fs/ext4/
|
||||
# ls /proc/fs/ext4/hdc/
|
||||
group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req
|
||||
stats stream_req
|
||||
|
||||
mb_groups:
|
||||
This file gives the details of mutiblock allocator buddy cache of free blocks
|
||||
|
||||
mb_history:
|
||||
Multiblock allocation history.
|
||||
|
||||
stats:
|
||||
This file indicate whether the multiblock allocator should start collecting
|
||||
statistics. The statistics are shown during unmount
|
||||
|
||||
group_prealloc:
|
||||
The multiblock allocator normalize the block allocation request to
|
||||
group_prealloc filesystem blocks if we don't have strip value set.
|
||||
The stripe value can be specified at mount time or during mke2fs.
|
||||
|
||||
max_to_scan:
|
||||
How long multiblock allocator can look for a best extent (in found extents)
|
||||
|
||||
min_to_scan:
|
||||
How long multiblock allocator must look for a best extent
|
||||
|
||||
order2_req:
|
||||
Multiblock allocator use 2^N search using buddies only for requests greater
|
||||
than or equal to order2_req. The request size is specfied in file system
|
||||
blocks. A value of 2 indicate only if the requests are greater than or equal
|
||||
to 4 blocks.
|
||||
|
||||
stream_req:
|
||||
Files smaller than stream_req are served by the stream allocator, whose
|
||||
purpose is to pack requests as close each to other as possible to
|
||||
produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
|
||||
filesystem block size will use group based preallocation.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
Summary
|
||||
|
|
|
@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
|
|||
|
||||
ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
|
||||
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
|
||||
ext4_jbd2.o migrate.o
|
||||
ext4_jbd2.o migrate.o mballoc.o
|
||||
|
||||
ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
|
||||
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
|
||||
|
|
|
@ -577,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode)
|
|||
struct ext4_reserve_window_node *rsv;
|
||||
spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
|
||||
|
||||
ext4_mb_discard_inode_preallocations(inode);
|
||||
|
||||
if (!block_i)
|
||||
return;
|
||||
|
||||
|
@ -785,19 +787,29 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
|
|||
* @inode: inode
|
||||
* @block: start physical block to free
|
||||
* @count: number of blocks to count
|
||||
* @metadata: Are these metadata blocks
|
||||
*/
|
||||
void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t block, unsigned long count)
|
||||
ext4_fsblk_t block, unsigned long count,
|
||||
int metadata)
|
||||
{
|
||||
struct super_block * sb;
|
||||
unsigned long dquot_freed_blocks;
|
||||
|
||||
/* this isn't the right place to decide whether block is metadata
|
||||
* inode.c/extents.c knows better, but for safety ... */
|
||||
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
|
||||
ext4_should_journal_data(inode))
|
||||
metadata = 1;
|
||||
|
||||
sb = inode->i_sb;
|
||||
if (!sb) {
|
||||
printk ("ext4_free_blocks: nonexistent device");
|
||||
return;
|
||||
}
|
||||
ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
|
||||
|
||||
if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
|
||||
ext4_free_blocks_sb(handle, sb, block, count,
|
||||
&dquot_freed_blocks);
|
||||
else
|
||||
ext4_mb_free_blocks(handle, inode, block, count,
|
||||
metadata, &dquot_freed_blocks);
|
||||
if (dquot_freed_blocks)
|
||||
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
|
||||
return;
|
||||
|
@ -1576,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
|||
}
|
||||
|
||||
/**
|
||||
* ext4_new_blocks() -- core block(s) allocation function
|
||||
* ext4_new_blocks_old() -- core block(s) allocation function
|
||||
* @handle: handle to this transaction
|
||||
* @inode: file inode
|
||||
* @goal: given target block(filesystem wide)
|
||||
|
@ -1589,7 +1601,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
|||
* any specific goal block.
|
||||
*
|
||||
*/
|
||||
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
||||
{
|
||||
struct buffer_head *bitmap_bh = NULL;
|
||||
|
@ -1849,13 +1861,46 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
|
|||
}
|
||||
|
||||
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, int *errp)
|
||||
ext4_fsblk_t goal, int *errp)
|
||||
{
|
||||
unsigned long count = 1;
|
||||
struct ext4_allocation_request ar;
|
||||
ext4_fsblk_t ret;
|
||||
|
||||
return ext4_new_blocks(handle, inode, goal, &count, errp);
|
||||
if (!test_opt(inode->i_sb, MBALLOC)) {
|
||||
unsigned long count = 1;
|
||||
ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(&ar, 0, sizeof(ar));
|
||||
ar.inode = inode;
|
||||
ar.goal = goal;
|
||||
ar.len = 1;
|
||||
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
||||
{
|
||||
struct ext4_allocation_request ar;
|
||||
ext4_fsblk_t ret;
|
||||
|
||||
if (!test_opt(inode->i_sb, MBALLOC)) {
|
||||
ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(&ar, 0, sizeof(ar));
|
||||
ar.inode = inode;
|
||||
ar.goal = goal;
|
||||
ar.len = *count;
|
||||
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
||||
*count = ar.len;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ext4_count_free_blocks() -- count filesystem free blocks
|
||||
* @sb: superblock
|
||||
|
|
|
@ -853,7 +853,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
|
|||
for (i = 0; i < depth; i++) {
|
||||
if (!ablocks[i])
|
||||
continue;
|
||||
ext4_free_blocks(handle, inode, ablocks[i], 1);
|
||||
ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
|
||||
}
|
||||
}
|
||||
kfree(ablocks);
|
||||
|
@ -1698,7 +1698,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
|
|||
ext_debug("index is empty, remove it, free block %llu\n", leaf);
|
||||
bh = sb_find_get_block(inode->i_sb, leaf);
|
||||
ext4_forget(handle, 1, inode, bh, leaf);
|
||||
ext4_free_blocks(handle, inode, leaf, 1);
|
||||
ext4_free_blocks(handle, inode, leaf, 1, 1);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -1759,8 +1759,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||
{
|
||||
struct buffer_head *bh;
|
||||
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
||||
int i;
|
||||
int i, metadata = 0;
|
||||
|
||||
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
|
||||
metadata = 1;
|
||||
#ifdef EXTENTS_STATS
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
@ -1789,7 +1791,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||
bh = sb_find_get_block(inode->i_sb, start + i);
|
||||
ext4_forget(handle, 0, inode, bh, start + i);
|
||||
}
|
||||
ext4_free_blocks(handle, inode, start, num);
|
||||
ext4_free_blocks(handle, inode, start, num, metadata);
|
||||
} else if (from == le32_to_cpu(ex->ee_block)
|
||||
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
||||
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
|
||||
|
@ -2287,6 +2289,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
|||
ext4_fsblk_t goal, newblock;
|
||||
int err = 0, depth, ret;
|
||||
unsigned long allocated = 0;
|
||||
struct ext4_allocation_request ar;
|
||||
|
||||
__clear_bit(BH_New, &bh_result->b_state);
|
||||
ext_debug("blocks %u/%lu requested for inode %u\n",
|
||||
|
@ -2397,8 +2400,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
|||
if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
|
||||
ext4_init_block_alloc_info(inode);
|
||||
|
||||
/* allocate new block */
|
||||
goal = ext4_ext_find_goal(inode, path, iblock);
|
||||
/* find neighbour allocated blocks */
|
||||
ar.lleft = iblock;
|
||||
err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
|
||||
if (err)
|
||||
goto out2;
|
||||
ar.lright = iblock;
|
||||
err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
|
||||
if (err)
|
||||
goto out2;
|
||||
|
||||
/*
|
||||
* See if request is beyond maximum number of blocks we can have in
|
||||
|
@ -2421,7 +2431,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
|||
allocated = le16_to_cpu(newex.ee_len);
|
||||
else
|
||||
allocated = max_blocks;
|
||||
newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
|
||||
|
||||
/* allocate new block */
|
||||
ar.inode = inode;
|
||||
ar.goal = ext4_ext_find_goal(inode, path, iblock);
|
||||
ar.logical = iblock;
|
||||
ar.len = allocated;
|
||||
if (S_ISREG(inode->i_mode))
|
||||
ar.flags = EXT4_MB_HINT_DATA;
|
||||
else
|
||||
/* disable in-core preallocation for non-regular files */
|
||||
ar.flags = 0;
|
||||
newblock = ext4_mb_new_blocks(handle, &ar, &err);
|
||||
if (!newblock)
|
||||
goto out2;
|
||||
ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
|
||||
|
@ -2429,14 +2450,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
|||
|
||||
/* try to insert new extent into found leaf and return */
|
||||
ext4_ext_store_pblock(&newex, newblock);
|
||||
newex.ee_len = cpu_to_le16(allocated);
|
||||
newex.ee_len = cpu_to_le16(ar.len);
|
||||
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
|
||||
ext4_ext_mark_uninitialized(&newex);
|
||||
err = ext4_ext_insert_extent(handle, inode, path, &newex);
|
||||
if (err) {
|
||||
/* free data blocks we just allocated */
|
||||
/* not a good idea to call discard here directly,
|
||||
* but otherwise we'd need to call it every free() */
|
||||
ext4_mb_discard_inode_preallocations(inode);
|
||||
ext4_free_blocks(handle, inode, ext_pblock(&newex),
|
||||
le16_to_cpu(newex.ee_len));
|
||||
le16_to_cpu(newex.ee_len), 0);
|
||||
goto out2;
|
||||
}
|
||||
|
||||
|
@ -2445,6 +2469,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
|||
|
||||
/* previous routine could use block we allocated */
|
||||
newblock = ext_pblock(&newex);
|
||||
allocated = le16_to_cpu(newex.ee_len);
|
||||
outnew:
|
||||
__set_bit(BH_New, &bh_result->b_state);
|
||||
|
||||
|
@ -2496,6 +2521,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
|
|||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_ext_invalidate_cache(inode);
|
||||
|
||||
ext4_mb_discard_inode_preallocations(inode);
|
||||
|
||||
/*
|
||||
* TODO: optimization is possible here.
|
||||
* Probably we need not scan at all,
|
||||
|
|
|
@ -551,7 +551,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
|
|||
return ret;
|
||||
failed_out:
|
||||
for (i = 0; i <index; i++)
|
||||
ext4_free_blocks(handle, inode, new_blocks[i], 1);
|
||||
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -650,9 +650,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
|
|||
ext4_journal_forget(handle, branch[i].bh);
|
||||
}
|
||||
for (i = 0; i <indirect_blks; i++)
|
||||
ext4_free_blocks(handle, inode, new_blocks[i], 1);
|
||||
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
|
||||
|
||||
ext4_free_blocks(handle, inode, new_blocks[i], num);
|
||||
ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -749,9 +749,10 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
|
|||
for (i = 1; i <= num; i++) {
|
||||
BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
|
||||
ext4_journal_forget(handle, where[i].bh);
|
||||
ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
|
||||
ext4_free_blocks(handle, inode,
|
||||
le32_to_cpu(where[i-1].key), 1, 0);
|
||||
}
|
||||
ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
|
||||
ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -2052,7 +2053,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
|
|||
}
|
||||
}
|
||||
|
||||
ext4_free_blocks(handle, inode, block_to_free, count);
|
||||
ext4_free_blocks(handle, inode, block_to_free, count, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2225,7 +2226,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
|
|||
ext4_journal_test_restart(handle, inode);
|
||||
}
|
||||
|
||||
ext4_free_blocks(handle, inode, nr, 1);
|
||||
ext4_free_blocks(handle, inode, nr, 1, 1);
|
||||
|
||||
if (parent_bh) {
|
||||
/*
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -236,10 +236,10 @@ static int free_dind_blocks(handle_t *handle,
|
|||
for (i = 0; i < max_entries; i++) {
|
||||
if (tmp_idata[i])
|
||||
ext4_free_blocks(handle, inode,
|
||||
le32_to_cpu(tmp_idata[i]), 1);
|
||||
le32_to_cpu(tmp_idata[i]), 1, 1);
|
||||
}
|
||||
put_bh(bh);
|
||||
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
|
||||
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -267,7 +267,7 @@ static int free_tind_blocks(handle_t *handle,
|
|||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
|
||||
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -278,7 +278,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
|
|||
|
||||
if (ei->i_data[EXT4_IND_BLOCK])
|
||||
ext4_free_blocks(handle, inode,
|
||||
le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1);
|
||||
le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
|
||||
|
||||
if (ei->i_data[EXT4_DIND_BLOCK]) {
|
||||
retval = free_dind_blocks(handle, inode,
|
||||
|
@ -365,7 +365,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
|
|||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
ext4_free_blocks(handle, inode, block, 1);
|
||||
ext4_free_blocks(handle, inode, block, 1, 1);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
|
|
@ -503,6 +503,7 @@ static void ext4_put_super (struct super_block * sb)
|
|||
struct ext4_super_block *es = sbi->s_es;
|
||||
int i;
|
||||
|
||||
ext4_mb_release(sb);
|
||||
ext4_ext_release(sb);
|
||||
ext4_xattr_put_super(sb);
|
||||
jbd2_journal_destroy(sbi->s_journal);
|
||||
|
@ -569,6 +570,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
|||
ei->i_block_alloc_info = NULL;
|
||||
ei->vfs_inode.i_version = 1;
|
||||
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
|
||||
INIT_LIST_HEAD(&ei->i_prealloc_list);
|
||||
spin_lock_init(&ei->i_prealloc_lock);
|
||||
return &ei->vfs_inode;
|
||||
}
|
||||
|
||||
|
@ -881,6 +884,7 @@ enum {
|
|||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
||||
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
||||
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
||||
Opt_mballoc, Opt_nomballoc, Opt_stripe,
|
||||
};
|
||||
|
||||
static match_table_t tokens = {
|
||||
|
@ -935,6 +939,9 @@ static match_table_t tokens = {
|
|||
{Opt_extents, "extents"},
|
||||
{Opt_noextents, "noextents"},
|
||||
{Opt_i_version, "i_version"},
|
||||
{Opt_mballoc, "mballoc"},
|
||||
{Opt_nomballoc, "nomballoc"},
|
||||
{Opt_stripe, "stripe=%u"},
|
||||
{Opt_err, NULL},
|
||||
{Opt_resize, "resize"},
|
||||
};
|
||||
|
@ -1284,6 +1291,19 @@ static int parse_options (char *options, struct super_block *sb,
|
|||
set_opt(sbi->s_mount_opt, I_VERSION);
|
||||
sb->s_flags |= MS_I_VERSION;
|
||||
break;
|
||||
case Opt_mballoc:
|
||||
set_opt(sbi->s_mount_opt, MBALLOC);
|
||||
break;
|
||||
case Opt_nomballoc:
|
||||
clear_opt(sbi->s_mount_opt, MBALLOC);
|
||||
break;
|
||||
case Opt_stripe:
|
||||
if (match_int(&args[0], &option))
|
||||
return 0;
|
||||
if (option < 0)
|
||||
return 0;
|
||||
sbi->s_stripe = option;
|
||||
break;
|
||||
default:
|
||||
printk (KERN_ERR
|
||||
"EXT4-fs: Unrecognized mount option \"%s\" "
|
||||
|
@ -1742,6 +1762,34 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
|
|||
return (has_super + ext4_group_first_block_no(sb, bg));
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_get_stripe_size: Get the stripe size.
|
||||
* @sbi: In memory super block info
|
||||
*
|
||||
* If we have specified it via mount option, then
|
||||
* use the mount option value. If the value specified at mount time is
|
||||
* greater than the blocks per group use the super block value.
|
||||
* If the super block value is greater than blocks per group return 0.
|
||||
* Allocator needs it be less than blocks per group.
|
||||
*
|
||||
*/
|
||||
static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
|
||||
{
|
||||
unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
|
||||
unsigned long stripe_width =
|
||||
le32_to_cpu(sbi->s_es->s_raid_stripe_width);
|
||||
|
||||
if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
|
||||
return sbi->s_stripe;
|
||||
|
||||
if (stripe_width <= sbi->s_blocks_per_group)
|
||||
return stripe_width;
|
||||
|
||||
if (stride <= sbi->s_blocks_per_group)
|
||||
return stride;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
||||
__releases(kernel_sem)
|
||||
|
@ -2091,6 +2139,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
|||
sbi->s_rsv_window_head.rsv_goal_size = 0;
|
||||
ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
|
||||
|
||||
sbi->s_stripe = ext4_get_stripe_size(sbi);
|
||||
|
||||
/*
|
||||
* set up enough so that it can read an inode
|
||||
*/
|
||||
|
@ -2250,6 +2300,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
|||
"writeback");
|
||||
|
||||
ext4_ext_init(sb);
|
||||
ext4_mb_init(sb, needs_recovery);
|
||||
|
||||
lock_kernel();
|
||||
return 0;
|
||||
|
@ -3232,9 +3283,15 @@ static struct file_system_type ext4dev_fs_type = {
|
|||
|
||||
static int __init init_ext4_fs(void)
|
||||
{
|
||||
int err = init_ext4_xattr();
|
||||
int err;
|
||||
|
||||
err = init_ext4_mballoc();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = init_ext4_xattr();
|
||||
if (err)
|
||||
goto out2;
|
||||
err = init_inodecache();
|
||||
if (err)
|
||||
goto out1;
|
||||
|
@ -3246,6 +3303,8 @@ static int __init init_ext4_fs(void)
|
|||
destroy_inodecache();
|
||||
out1:
|
||||
exit_ext4_xattr();
|
||||
out2:
|
||||
exit_ext4_mballoc();
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -3254,6 +3313,7 @@ static void __exit exit_ext4_fs(void)
|
|||
unregister_filesystem(&ext4dev_fs_type);
|
||||
destroy_inodecache();
|
||||
exit_ext4_xattr();
|
||||
exit_ext4_mballoc();
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
|
||||
|
|
|
@ -480,7 +480,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
|
|||
ea_bdebug(bh, "refcount now=0; freeing");
|
||||
if (ce)
|
||||
mb_cache_entry_free(ce);
|
||||
ext4_free_blocks(handle, inode, bh->b_blocknr, 1);
|
||||
ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
|
||||
get_bh(bh);
|
||||
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
|
||||
} else {
|
||||
|
@ -821,7 +821,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
|
|||
new_bh = sb_getblk(sb, block);
|
||||
if (!new_bh) {
|
||||
getblk_failed:
|
||||
ext4_free_blocks(handle, inode, block, 1);
|
||||
ext4_free_blocks(handle, inode, block, 1, 1);
|
||||
error = -EIO;
|
||||
goto cleanup;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include <linux/blkdev.h>
|
||||
#include <linux/magic.h>
|
||||
|
||||
#include <linux/ext4_fs_i.h>
|
||||
|
||||
/*
|
||||
* The second extended filesystem constants/structures
|
||||
*/
|
||||
|
@ -51,6 +53,50 @@
|
|||
#define ext4_debug(f, a...) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define EXT4_MULTIBLOCK_ALLOCATOR 1
|
||||
|
||||
/* prefer goal again. length */
|
||||
#define EXT4_MB_HINT_MERGE 1
|
||||
/* blocks already reserved */
|
||||
#define EXT4_MB_HINT_RESERVED 2
|
||||
/* metadata is being allocated */
|
||||
#define EXT4_MB_HINT_METADATA 4
|
||||
/* first blocks in the file */
|
||||
#define EXT4_MB_HINT_FIRST 8
|
||||
/* search for the best chunk */
|
||||
#define EXT4_MB_HINT_BEST 16
|
||||
/* data is being allocated */
|
||||
#define EXT4_MB_HINT_DATA 32
|
||||
/* don't preallocate (for tails) */
|
||||
#define EXT4_MB_HINT_NOPREALLOC 64
|
||||
/* allocate for locality group */
|
||||
#define EXT4_MB_HINT_GROUP_ALLOC 128
|
||||
/* allocate goal blocks or none */
|
||||
#define EXT4_MB_HINT_GOAL_ONLY 256
|
||||
/* goal is meaningful */
|
||||
#define EXT4_MB_HINT_TRY_GOAL 512
|
||||
|
||||
struct ext4_allocation_request {
|
||||
/* target inode for block we're allocating */
|
||||
struct inode *inode;
|
||||
/* logical block in target inode */
|
||||
ext4_lblk_t logical;
|
||||
/* phys. target (a hint) */
|
||||
ext4_fsblk_t goal;
|
||||
/* the closest logical allocated block to the left */
|
||||
ext4_lblk_t lleft;
|
||||
/* phys. block for ^^^ */
|
||||
ext4_fsblk_t pleft;
|
||||
/* the closest logical allocated block to the right */
|
||||
ext4_lblk_t lright;
|
||||
/* phys. block for ^^^ */
|
||||
ext4_fsblk_t pright;
|
||||
/* how many blocks we want to allocate */
|
||||
unsigned long len;
|
||||
/* flags. see above EXT4_MB_HINT_* */
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* Special inodes numbers
|
||||
*/
|
||||
|
@ -474,6 +520,7 @@ do { \
|
|||
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
|
||||
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
|
||||
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
||||
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
|
||||
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
||||
#ifndef _LINUX_EXT2_FS_H
|
||||
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
||||
|
@ -912,7 +959,7 @@ extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
|
|||
extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, unsigned long *count, int *errp);
|
||||
extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t block, unsigned long count);
|
||||
ext4_fsblk_t block, unsigned long count, int metadata);
|
||||
extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
|
||||
ext4_fsblk_t block, unsigned long count,
|
||||
unsigned long *pdquot_freed_blocks);
|
||||
|
@ -950,6 +997,20 @@ extern unsigned long ext4_count_dirs (struct super_block *);
|
|||
extern void ext4_check_inodes_bitmap (struct super_block *);
|
||||
extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
|
||||
|
||||
/* mballoc.c */
|
||||
extern long ext4_mb_stats;
|
||||
extern long ext4_mb_max_to_scan;
|
||||
extern int ext4_mb_init(struct super_block *, int);
|
||||
extern int ext4_mb_release(struct super_block *);
|
||||
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
|
||||
struct ext4_allocation_request *, int *);
|
||||
extern int ext4_mb_reserve_blocks(struct super_block *, int);
|
||||
extern void ext4_mb_discard_inode_preallocations(struct inode *);
|
||||
extern int __init init_ext4_mballoc(void);
|
||||
extern void exit_ext4_mballoc(void);
|
||||
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
|
||||
unsigned long, unsigned long, int, unsigned long *);
|
||||
|
||||
|
||||
/* inode.c */
|
||||
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
|
||||
|
@ -1080,6 +1141,19 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
|
|||
raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
|
||||
}
|
||||
|
||||
static inline
|
||||
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
{
|
||||
struct ext4_group_info ***grp_info;
|
||||
long indexv, indexh;
|
||||
grp_info = EXT4_SB(sb)->s_group_info;
|
||||
indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
|
||||
indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
|
||||
return grp_info[indexv][indexh];
|
||||
}
|
||||
|
||||
|
||||
#define ext4_std_error(sb, errno) \
|
||||
do { \
|
||||
if ((errno)) \
|
||||
|
|
|
@ -158,6 +158,10 @@ struct ext4_inode_info {
|
|||
* struct timespec i_{a,c,m}time in the generic inode.
|
||||
*/
|
||||
struct timespec i_crtime;
|
||||
|
||||
/* mballoc */
|
||||
struct list_head i_prealloc_list;
|
||||
spinlock_t i_prealloc_lock;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_EXT4_FS_I */
|
||||
|
|
|
@ -91,6 +91,58 @@ struct ext4_sb_info {
|
|||
unsigned long s_ext_blocks;
|
||||
unsigned long s_ext_extents;
|
||||
#endif
|
||||
|
||||
/* for buddy allocator */
|
||||
struct ext4_group_info ***s_group_info;
|
||||
struct inode *s_buddy_cache;
|
||||
long s_blocks_reserved;
|
||||
spinlock_t s_reserve_lock;
|
||||
struct list_head s_active_transaction;
|
||||
struct list_head s_closed_transaction;
|
||||
struct list_head s_committed_transaction;
|
||||
spinlock_t s_md_lock;
|
||||
tid_t s_last_transaction;
|
||||
unsigned short *s_mb_offsets, *s_mb_maxs;
|
||||
|
||||
/* tunables */
|
||||
unsigned long s_stripe;
|
||||
unsigned long s_mb_stream_request;
|
||||
unsigned long s_mb_max_to_scan;
|
||||
unsigned long s_mb_min_to_scan;
|
||||
unsigned long s_mb_stats;
|
||||
unsigned long s_mb_order2_reqs;
|
||||
unsigned long s_mb_group_prealloc;
|
||||
/* where last allocation was done - for stream allocation */
|
||||
unsigned long s_mb_last_group;
|
||||
unsigned long s_mb_last_start;
|
||||
|
||||
/* history to debug policy */
|
||||
struct ext4_mb_history *s_mb_history;
|
||||
int s_mb_history_cur;
|
||||
int s_mb_history_max;
|
||||
int s_mb_history_num;
|
||||
struct proc_dir_entry *s_mb_proc;
|
||||
spinlock_t s_mb_history_lock;
|
||||
int s_mb_history_filter;
|
||||
|
||||
/* stats for buddy allocator */
|
||||
spinlock_t s_mb_pa_lock;
|
||||
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
|
||||
atomic_t s_bal_success; /* we found long enough chunks */
|
||||
atomic_t s_bal_allocated; /* in blocks */
|
||||
atomic_t s_bal_ex_scanned; /* total extents scanned */
|
||||
atomic_t s_bal_goals; /* goal hits */
|
||||
atomic_t s_bal_breaks; /* too long searches */
|
||||
atomic_t s_bal_2orders; /* 2^order hits */
|
||||
spinlock_t s_bal_lock;
|
||||
unsigned long s_mb_buddies_generated;
|
||||
unsigned long long s_mb_generation_time;
|
||||
atomic_t s_mb_lost_chunks;
|
||||
atomic_t s_mb_preallocated;
|
||||
atomic_t s_mb_discarded;
|
||||
|
||||
/* locality groups */
|
||||
struct ext4_locality_group *s_locality_groups;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_EXT4_FS_SB */
|
||||
|
|
Loading…
Reference in New Issue