ext4: implement allocation of pre-zeroed blocks
DAX page fault path needs to get blocks that are pre-zeroed to avoid races when two concurrent page faults happen in the same block of a file. Implement support for this in ext4_map_blocks(). Signed-off-by: Jan Kara <jack@suse.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
53085fac02
commit
c86d8db33a
|
@ -557,6 +557,10 @@ enum {
|
|||
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
|
||||
/* Convert written extents to unwritten */
|
||||
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
|
||||
/* Write zeros to newly created written extents */
|
||||
#define EXT4_GET_BLOCKS_ZERO 0x0200
|
||||
#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
|
||||
EXT4_GET_BLOCKS_ZERO)
|
||||
|
||||
/*
|
||||
* The bit position of these flags must not overlap with any of the
|
||||
|
|
|
@ -4044,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
|
|||
}
|
||||
/* IO end_io complete, convert the filled extent to written */
|
||||
if (flags & EXT4_GET_BLOCKS_CONVERT) {
|
||||
if (flags & EXT4_GET_BLOCKS_ZERO) {
|
||||
if (allocated > map->m_len)
|
||||
allocated = map->m_len;
|
||||
err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
|
||||
allocated);
|
||||
if (err < 0)
|
||||
goto out2;
|
||||
}
|
||||
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
|
||||
ppath);
|
||||
if (ret >= 0) {
|
||||
|
|
|
@ -636,6 +636,22 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
|||
WARN_ON(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to zeroout blocks before inserting them into extent
|
||||
* status tree. Otherwise someone could look them up there and
|
||||
* use them before they are really zeroed.
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_ZERO &&
|
||||
map->m_flags & EXT4_MAP_MAPPED &&
|
||||
map->m_flags & EXT4_MAP_NEW) {
|
||||
ret = ext4_issue_zeroout(inode, map->m_lblk,
|
||||
map->m_pblk, map->m_len);
|
||||
if (ret) {
|
||||
retval = ret;
|
||||
goto out_sem;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the extent has been zeroed out, we don't need to update
|
||||
* extent status tree.
|
||||
|
@ -643,7 +659,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
|||
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
|
||||
ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
|
||||
if (ext4_es_is_written(&es))
|
||||
goto has_zeroout;
|
||||
goto out_sem;
|
||||
}
|
||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
|
@ -654,11 +670,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
|||
status |= EXTENT_STATUS_DELAYED;
|
||||
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
map->m_pblk, status);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
retval = ret;
|
||||
goto out_sem;
|
||||
}
|
||||
}
|
||||
|
||||
has_zeroout:
|
||||
out_sem:
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
|
||||
ret = check_block_validity(inode, map);
|
||||
|
@ -3083,6 +3101,7 @@ int ext4_get_block_dax(struct inode *inode, sector_t iblock,
|
|||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
|
||||
|
||||
if (create)
|
||||
flags |= EXT4_GET_BLOCKS_CREATE;
|
||||
ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
|
||||
|
|
|
@ -42,7 +42,8 @@ struct extent_status;
|
|||
{ EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \
|
||||
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
|
||||
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
|
||||
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" })
|
||||
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
|
||||
{ EXT4_GET_BLOCKS_ZERO, "ZERO" })
|
||||
|
||||
#define show_mflags(flags) __print_flags(flags, "", \
|
||||
{ EXT4_MAP_NEW, "N" }, \
|
||||
|
|
Loading…
Reference in New Issue