Merge branch 'xfs-4.8-iomap-write' into for-next
This commit is contained in:
commit
9b7fad2076
|
@ -4,6 +4,7 @@ config XFS_FS
|
|||
depends on (64BIT || LBDAF)
|
||||
select EXPORTFS
|
||||
select LIBCRC32C
|
||||
select FS_IOMAP
|
||||
help
|
||||
XFS is a high performance journaling filesystem which originated
|
||||
on the SGI IRIX platform. It is completely multi-threaded, can
|
||||
|
|
|
@ -1143,6 +1143,8 @@ __xfs_get_blocks(
|
|||
ssize_t size;
|
||||
int new = 0;
|
||||
|
||||
BUG_ON(create && !direct);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -EIO;
|
||||
|
||||
|
@ -1150,22 +1152,14 @@ __xfs_get_blocks(
|
|||
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
|
||||
size = bh_result->b_size;
|
||||
|
||||
if (!create && direct && offset >= i_size_read(inode))
|
||||
if (!create && offset >= i_size_read(inode))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Direct I/O is usually done on preallocated files, so try getting
|
||||
* a block mapping without an exclusive lock first. For buffered
|
||||
* writes we already have the exclusive iolock anyway, so avoiding
|
||||
* a lock roundtrip here by taking the ilock exclusive from the
|
||||
* beginning is a useful micro optimization.
|
||||
* a block mapping without an exclusive lock first.
|
||||
*/
|
||||
if (create && !direct) {
|
||||
lockmode = XFS_ILOCK_EXCL;
|
||||
xfs_ilock(ip, lockmode);
|
||||
} else {
|
||||
lockmode = xfs_ilock_data_map_shared(ip);
|
||||
}
|
||||
lockmode = xfs_ilock_data_map_shared(ip);
|
||||
|
||||
ASSERT(offset <= mp->m_super->s_maxbytes);
|
||||
if (offset + size > mp->m_super->s_maxbytes)
|
||||
|
@ -1184,37 +1178,19 @@ __xfs_get_blocks(
|
|||
(imap.br_startblock == HOLESTARTBLOCK ||
|
||||
imap.br_startblock == DELAYSTARTBLOCK) ||
|
||||
(IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
|
||||
if (direct || xfs_get_extsz_hint(ip)) {
|
||||
/*
|
||||
* xfs_iomap_write_direct() expects the shared lock. It
|
||||
* is unlocked on return.
|
||||
*/
|
||||
if (lockmode == XFS_ILOCK_EXCL)
|
||||
xfs_ilock_demote(ip, lockmode);
|
||||
/*
|
||||
* xfs_iomap_write_direct() expects the shared lock. It
|
||||
* is unlocked on return.
|
||||
*/
|
||||
if (lockmode == XFS_ILOCK_EXCL)
|
||||
xfs_ilock_demote(ip, lockmode);
|
||||
|
||||
error = xfs_iomap_write_direct(ip, offset, size,
|
||||
&imap, nimaps);
|
||||
if (error)
|
||||
return error;
|
||||
new = 1;
|
||||
error = xfs_iomap_write_direct(ip, offset, size,
|
||||
&imap, nimaps);
|
||||
if (error)
|
||||
return error;
|
||||
new = 1;
|
||||
|
||||
} else {
|
||||
/*
|
||||
* Delalloc reservations do not require a transaction,
|
||||
* we can go on without dropping the lock here. If we
|
||||
* are allocating a new delalloc block, make sure that
|
||||
* we set the new flag so that we mark the buffer new so
|
||||
* that we know that it is newly allocated if the write
|
||||
* fails.
|
||||
*/
|
||||
if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
|
||||
new = 1;
|
||||
error = xfs_iomap_write_delay(ip, offset, size, &imap);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
xfs_iunlock(ip, lockmode);
|
||||
}
|
||||
trace_xfs_get_blocks_alloc(ip, offset, size,
|
||||
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
|
||||
: XFS_IO_DELALLOC, &imap);
|
||||
|
@ -1235,9 +1211,7 @@ __xfs_get_blocks(
|
|||
}
|
||||
|
||||
/* trim mapping down to size requested */
|
||||
if (direct || size > (1 << inode->i_blkbits))
|
||||
xfs_map_trim_size(inode, iblock, bh_result,
|
||||
&imap, offset, size);
|
||||
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
|
||||
|
||||
/*
|
||||
* For unwritten extents do not report a disk address in the buffered
|
||||
|
@ -1250,7 +1224,7 @@ __xfs_get_blocks(
|
|||
if (ISUNWRITTEN(&imap))
|
||||
set_buffer_unwritten(bh_result);
|
||||
/* direct IO needs special help */
|
||||
if (create && direct) {
|
||||
if (create) {
|
||||
if (dax_fault)
|
||||
ASSERT(!ISUNWRITTEN(&imap));
|
||||
else
|
||||
|
@ -1279,14 +1253,7 @@ __xfs_get_blocks(
|
|||
(new || ISUNWRITTEN(&imap))))
|
||||
set_buffer_new(bh_result);
|
||||
|
||||
if (imap.br_startblock == DELAYSTARTBLOCK) {
|
||||
BUG_ON(direct);
|
||||
if (create) {
|
||||
set_buffer_uptodate(bh_result);
|
||||
set_buffer_mapped(bh_result);
|
||||
set_buffer_delay(bh_result);
|
||||
}
|
||||
}
|
||||
BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
|
|||
xfs_get_blocks_direct, endio, NULL, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Punch out the delalloc blocks we have already allocated.
|
||||
*
|
||||
* Don't bother with xfs_setattr given that nothing can have made it to disk yet
|
||||
* as the page is still locked at this point.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_vm_kill_delalloc_range(
|
||||
struct inode *inode,
|
||||
loff_t start,
|
||||
loff_t end)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fileoff_t start_fsb;
|
||||
xfs_fileoff_t end_fsb;
|
||||
int error;
|
||||
|
||||
start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
|
||||
end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
|
||||
if (end_fsb <= start_fsb)
|
||||
return;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||
end_fsb - start_fsb);
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
||||
xfs_alert(ip->i_mount,
|
||||
"xfs_vm_write_failed: unable to clean up ino %lld",
|
||||
ip->i_ino);
|
||||
}
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_vm_write_failed(
|
||||
struct inode *inode,
|
||||
struct page *page,
|
||||
loff_t pos,
|
||||
unsigned len)
|
||||
{
|
||||
loff_t block_offset;
|
||||
loff_t block_start;
|
||||
loff_t block_end;
|
||||
loff_t from = pos & (PAGE_SIZE - 1);
|
||||
loff_t to = from + len;
|
||||
struct buffer_head *bh, *head;
|
||||
struct xfs_mount *mp = XFS_I(inode)->i_mount;
|
||||
|
||||
/*
|
||||
* The request pos offset might be 32 or 64 bit, this is all fine
|
||||
* on 64-bit platform. However, for 64-bit pos request on 32-bit
|
||||
* platform, the high 32-bit will be masked off if we evaluate the
|
||||
* block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
|
||||
* 0xfffff000 as an unsigned long, hence the result is incorrect
|
||||
* which could cause the following ASSERT failed in most cases.
|
||||
* In order to avoid this, we can evaluate the block_offset of the
|
||||
* start of the page by using shifts rather than masks the mismatch
|
||||
* problem.
|
||||
*/
|
||||
block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
|
||||
|
||||
ASSERT(block_offset + from == pos);
|
||||
|
||||
head = page_buffers(page);
|
||||
block_start = 0;
|
||||
for (bh = head; bh != head || !block_start;
|
||||
bh = bh->b_this_page, block_start = block_end,
|
||||
block_offset += bh->b_size) {
|
||||
block_end = block_start + bh->b_size;
|
||||
|
||||
/* skip buffers before the write */
|
||||
if (block_end <= from)
|
||||
continue;
|
||||
|
||||
/* if the buffer is after the write, we're done */
|
||||
if (block_start >= to)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Process delalloc and unwritten buffers beyond EOF. We can
|
||||
* encounter unwritten buffers in the event that a file has
|
||||
* post-EOF unwritten extents and an extending write happens to
|
||||
* fail (e.g., an unaligned write that also involves a delalloc
|
||||
* to the same page).
|
||||
*/
|
||||
if (!buffer_delay(bh) && !buffer_unwritten(bh))
|
||||
continue;
|
||||
|
||||
if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
|
||||
block_offset < i_size_read(inode))
|
||||
continue;
|
||||
|
||||
if (buffer_delay(bh))
|
||||
xfs_vm_kill_delalloc_range(inode, block_offset,
|
||||
block_offset + bh->b_size);
|
||||
|
||||
/*
|
||||
* This buffer does not contain data anymore. make sure anyone
|
||||
* who finds it knows that for certain.
|
||||
*/
|
||||
clear_buffer_delay(bh);
|
||||
clear_buffer_uptodate(bh);
|
||||
clear_buffer_mapped(bh);
|
||||
clear_buffer_new(bh);
|
||||
clear_buffer_dirty(bh);
|
||||
clear_buffer_unwritten(bh);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This used to call block_write_begin(), but it unlocks and releases the page
|
||||
* on error, and we need that page to be able to punch stale delalloc blocks out
|
||||
* on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
|
||||
* the appropriate point.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_vm_write_begin(
|
||||
struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos,
|
||||
unsigned len,
|
||||
unsigned flags,
|
||||
struct page **pagep,
|
||||
void **fsdata)
|
||||
{
|
||||
pgoff_t index = pos >> PAGE_SHIFT;
|
||||
struct page *page;
|
||||
int status;
|
||||
struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
|
||||
|
||||
ASSERT(len <= PAGE_SIZE);
|
||||
|
||||
page = grab_cache_page_write_begin(mapping, index, flags);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
status = __block_write_begin(page, pos, len, xfs_get_blocks);
|
||||
if (xfs_mp_fail_writes(mp))
|
||||
status = -EIO;
|
||||
if (unlikely(status)) {
|
||||
struct inode *inode = mapping->host;
|
||||
size_t isize = i_size_read(inode);
|
||||
|
||||
xfs_vm_write_failed(inode, page, pos, len);
|
||||
unlock_page(page);
|
||||
|
||||
/*
|
||||
* If the write is beyond EOF, we only want to kill blocks
|
||||
* allocated in this write, not blocks that were previously
|
||||
* written successfully.
|
||||
*/
|
||||
if (xfs_mp_fail_writes(mp))
|
||||
isize = 0;
|
||||
if (pos + len > isize) {
|
||||
ssize_t start = max_t(ssize_t, pos, isize);
|
||||
|
||||
truncate_pagecache_range(inode, start, pos + len);
|
||||
}
|
||||
|
||||
put_page(page);
|
||||
page = NULL;
|
||||
}
|
||||
|
||||
*pagep = page;
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* On failure, we only need to kill delalloc blocks beyond EOF in the range of
|
||||
* this specific write because they will never be written. Previous writes
|
||||
* beyond EOF where block allocation succeeded do not need to be trashed, so
|
||||
* only new blocks from this write should be trashed. For blocks within
|
||||
* EOF, generic_write_end() zeros them so they are safe to leave alone and be
|
||||
* written with all the other valid data.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_vm_write_end(
|
||||
struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos,
|
||||
unsigned len,
|
||||
unsigned copied,
|
||||
struct page *page,
|
||||
void *fsdata)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ASSERT(len <= PAGE_SIZE);
|
||||
|
||||
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||
if (unlikely(ret < len)) {
|
||||
struct inode *inode = mapping->host;
|
||||
size_t isize = i_size_read(inode);
|
||||
loff_t to = pos + len;
|
||||
|
||||
if (to > isize) {
|
||||
/* only kill blocks in this write beyond EOF */
|
||||
if (pos > isize)
|
||||
isize = pos;
|
||||
xfs_vm_kill_delalloc_range(inode, isize, to);
|
||||
truncate_pagecache_range(inode, isize, to);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC sector_t
|
||||
xfs_vm_bmap(
|
||||
struct address_space *mapping,
|
||||
|
@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
|
|||
.set_page_dirty = xfs_vm_set_page_dirty,
|
||||
.releasepage = xfs_vm_releasepage,
|
||||
.invalidatepage = xfs_vm_invalidatepage,
|
||||
.write_begin = xfs_vm_write_begin,
|
||||
.write_end = xfs_vm_write_end,
|
||||
.bmap = xfs_vm_bmap,
|
||||
.direct_IO = xfs_vm_direct_IO,
|
||||
.migratepage = buffer_migrate_page,
|
||||
|
|
|
@ -1087,99 +1087,120 @@ xfs_alloc_file_space(
|
|||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero file bytes between startoff and endoff inclusive.
|
||||
* The iolock is held exclusive and no blocks are buffered.
|
||||
*
|
||||
* This function is used by xfs_free_file_space() to zero
|
||||
* partial blocks when the range to free is not block aligned.
|
||||
* When unreserving space with boundaries that are not block
|
||||
* aligned we round up the start and round down the end
|
||||
* boundaries and then use this function to zero the parts of
|
||||
* the blocks that got dropped during the rounding.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_zero_remaining_bytes(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t startoff,
|
||||
xfs_off_t endoff)
|
||||
static int
|
||||
xfs_unmap_extent(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fileoff_t startoffset_fsb,
|
||||
xfs_filblks_t len_fsb,
|
||||
int *done)
|
||||
{
|
||||
xfs_bmbt_irec_t imap;
|
||||
xfs_fileoff_t offset_fsb;
|
||||
xfs_off_t lastoffset;
|
||||
xfs_off_t offset;
|
||||
xfs_buf_t *bp;
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
int nimap;
|
||||
int error = 0;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_trans *tp;
|
||||
struct xfs_bmap_free free_list;
|
||||
xfs_fsblock_t firstfsb;
|
||||
uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Avoid doing I/O beyond eof - it's not necessary
|
||||
* since nothing can read beyond eof. The space will
|
||||
* be zeroed when the file is extended anyway.
|
||||
*/
|
||||
if (startoff >= XFS_ISIZE(ip))
|
||||
return 0;
|
||||
|
||||
if (endoff > XFS_ISIZE(ip))
|
||||
endoff = XFS_ISIZE(ip);
|
||||
|
||||
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
|
||||
uint lock_mode;
|
||||
|
||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
nimap = 1;
|
||||
|
||||
lock_mode = xfs_ilock_data_map_shared(ip);
|
||||
error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
|
||||
xfs_iunlock(ip, lock_mode);
|
||||
|
||||
if (error || nimap < 1)
|
||||
break;
|
||||
ASSERT(imap.br_blockcount >= 1);
|
||||
ASSERT(imap.br_startoff == offset_fsb);
|
||||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||
|
||||
if (imap.br_startblock == HOLESTARTBLOCK ||
|
||||
imap.br_state == XFS_EXT_UNWRITTEN) {
|
||||
/* skip the entire extent */
|
||||
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
|
||||
imap.br_blockcount) - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
|
||||
if (lastoffset > endoff)
|
||||
lastoffset = endoff;
|
||||
|
||||
/* DAX can just zero the backing device directly */
|
||||
if (IS_DAX(VFS_I(ip))) {
|
||||
error = dax_zero_page_range(VFS_I(ip), offset,
|
||||
lastoffset - offset + 1,
|
||||
xfs_get_blocks_direct);
|
||||
if (error)
|
||||
return error;
|
||||
continue;
|
||||
}
|
||||
|
||||
error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp,
|
||||
xfs_fsb_to_db(ip, imap.br_startblock),
|
||||
BTOBB(mp->m_sb.sb_blocksize),
|
||||
0, &bp, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
memset(bp->b_addr +
|
||||
(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
|
||||
0, lastoffset - offset + 1);
|
||||
|
||||
error = xfs_bwrite(bp);
|
||||
xfs_buf_relse(bp);
|
||||
if (error)
|
||||
return error;
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
|
||||
if (error) {
|
||||
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
|
||||
return error;
|
||||
}
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
|
||||
ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
xfs_trans_ijoin(tp, ip, 0);
|
||||
|
||||
xfs_bmap_init(&free_list, &firstfsb);
|
||||
error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
|
||||
&free_list, done);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
error = xfs_bmap_finish(&tp, &free_list, NULL);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
error = xfs_trans_commit(tp);
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
return error;
|
||||
|
||||
out_bmap_cancel:
|
||||
xfs_bmap_cancel(&free_list);
|
||||
out_trans_cancel:
|
||||
xfs_trans_cancel(tp);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_adjust_extent_unmap_boundaries(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fileoff_t *startoffset_fsb,
|
||||
xfs_fileoff_t *endoffset_fsb)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_bmbt_irec imap;
|
||||
int nimap, error;
|
||||
xfs_extlen_t mod = 0;
|
||||
|
||||
nimap = 1;
|
||||
error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
|
||||
xfs_daddr_t block;
|
||||
|
||||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||
block = imap.br_startblock;
|
||||
mod = do_div(block, mp->m_sb.sb_rextsize);
|
||||
if (mod)
|
||||
*startoffset_fsb += mp->m_sb.sb_rextsize - mod;
|
||||
}
|
||||
|
||||
nimap = 1;
|
||||
error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
|
||||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||
mod++;
|
||||
if (mod && mod != mp->m_sb.sb_rextsize)
|
||||
*endoffset_fsb -= mod;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_flush_unmap_range(
|
||||
struct xfs_inode *ip,
|
||||
xfs_off_t offset,
|
||||
xfs_off_t len)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct inode *inode = VFS_I(ip);
|
||||
xfs_off_t rounding, start, end;
|
||||
int error;
|
||||
|
||||
/* wait for the completion of any pending DIOs */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
|
||||
start = round_down(offset, rounding);
|
||||
end = round_up(offset + len, rounding) - 1;
|
||||
|
||||
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
||||
if (error)
|
||||
return error;
|
||||
truncate_pagecache_range(inode, start, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -1188,24 +1209,10 @@ xfs_free_file_space(
|
|||
xfs_off_t offset,
|
||||
xfs_off_t len)
|
||||
{
|
||||
int done;
|
||||
xfs_fileoff_t endoffset_fsb;
|
||||
int error;
|
||||
xfs_fsblock_t firstfsb;
|
||||
xfs_bmap_free_t free_list;
|
||||
xfs_bmbt_irec_t imap;
|
||||
xfs_off_t ioffset;
|
||||
xfs_off_t iendoffset;
|
||||
xfs_extlen_t mod=0;
|
||||
xfs_mount_t *mp;
|
||||
int nimap;
|
||||
uint resblks;
|
||||
xfs_off_t rounding;
|
||||
int rt;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t startoffset_fsb;
|
||||
xfs_trans_t *tp;
|
||||
|
||||
mp = ip->i_mount;
|
||||
xfs_fileoff_t endoffset_fsb;
|
||||
int done = 0, error;
|
||||
|
||||
trace_xfs_free_file_space(ip);
|
||||
|
||||
|
@ -1213,135 +1220,45 @@ xfs_free_file_space(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
error = 0;
|
||||
if (len <= 0) /* if nothing being freed */
|
||||
return 0;
|
||||
|
||||
error = xfs_flush_unmap_range(ip, offset, len);
|
||||
if (error)
|
||||
return error;
|
||||
rt = XFS_IS_REALTIME_INODE(ip);
|
||||
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
|
||||
|
||||
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
|
||||
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
|
||||
|
||||
/* wait for the completion of any pending DIOs */
|
||||
inode_dio_wait(VFS_I(ip));
|
||||
|
||||
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
|
||||
ioffset = round_down(offset, rounding);
|
||||
iendoffset = round_up(offset + len, rounding) - 1;
|
||||
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
|
||||
iendoffset);
|
||||
if (error)
|
||||
goto out;
|
||||
truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
|
||||
|
||||
/*
|
||||
* Need to zero the stuff we're not freeing, on disk.
|
||||
* If it's a realtime file & can't use unwritten extents then we
|
||||
* actually need to zero the extent edges. Otherwise xfs_bunmapi
|
||||
* will take care of it for us.
|
||||
* Need to zero the stuff we're not freeing, on disk. If it's a RT file
|
||||
* and we can't use unwritten extents then we actually need to ensure
|
||||
* to zero the whole extent, otherwise we just need to take of block
|
||||
* boundaries, and xfs_bunmapi will handle the rest.
|
||||
*/
|
||||
if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
|
||||
nimap = 1;
|
||||
error = xfs_bmapi_read(ip, startoffset_fsb, 1,
|
||||
&imap, &nimap, 0);
|
||||
if (XFS_IS_REALTIME_INODE(ip) &&
|
||||
!xfs_sb_version_hasextflgbit(&mp->m_sb)) {
|
||||
error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
|
||||
&endoffset_fsb);
|
||||
if (error)
|
||||
goto out;
|
||||
ASSERT(nimap == 0 || nimap == 1);
|
||||
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
|
||||
xfs_daddr_t block;
|
||||
|
||||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||
block = imap.br_startblock;
|
||||
mod = do_div(block, mp->m_sb.sb_rextsize);
|
||||
if (mod)
|
||||
startoffset_fsb += mp->m_sb.sb_rextsize - mod;
|
||||
}
|
||||
nimap = 1;
|
||||
error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
|
||||
&imap, &nimap, 0);
|
||||
if (error)
|
||||
goto out;
|
||||
ASSERT(nimap == 0 || nimap == 1);
|
||||
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
|
||||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||
mod++;
|
||||
if (mod && (mod != mp->m_sb.sb_rextsize))
|
||||
endoffset_fsb -= mod;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
if ((done = (endoffset_fsb <= startoffset_fsb)))
|
||||
/*
|
||||
* One contiguous piece to clear
|
||||
*/
|
||||
error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
|
||||
else {
|
||||
/*
|
||||
* Some full blocks, possibly two pieces to clear
|
||||
*/
|
||||
if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
|
||||
error = xfs_zero_remaining_bytes(ip, offset,
|
||||
XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
|
||||
if (!error &&
|
||||
XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
|
||||
error = xfs_zero_remaining_bytes(ip,
|
||||
XFS_FSB_TO_B(mp, endoffset_fsb),
|
||||
offset + len - 1);
|
||||
|
||||
if (endoffset_fsb > startoffset_fsb) {
|
||||
while (!done) {
|
||||
error = xfs_unmap_extent(ip, startoffset_fsb,
|
||||
endoffset_fsb - startoffset_fsb, &done);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* free file space until done or until there is an error
|
||||
* Now that we've unmap all full blocks we'll have to zero out any
|
||||
* partial block at the beginning and/or end. xfs_zero_range is
|
||||
* smart enough to skip any holes, including those we just created.
|
||||
*/
|
||||
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
|
||||
while (!error && !done) {
|
||||
|
||||
/*
|
||||
* allocate and setup the transaction. Allow this
|
||||
* transaction to dip into the reserve blocks to ensure
|
||||
* the freeing of the space succeeds at ENOSPC.
|
||||
*/
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
|
||||
&tp);
|
||||
if (error) {
|
||||
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
|
||||
break;
|
||||
}
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_trans_reserve_quota(tp, mp,
|
||||
ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
|
||||
resblks, 0, XFS_QMOPT_RES_REGBLKS);
|
||||
if (error)
|
||||
goto error1;
|
||||
|
||||
xfs_trans_ijoin(tp, ip, 0);
|
||||
|
||||
/*
|
||||
* issue the bunmapi() call to free the blocks
|
||||
*/
|
||||
xfs_bmap_init(&free_list, &firstfsb);
|
||||
error = xfs_bunmapi(tp, ip, startoffset_fsb,
|
||||
endoffset_fsb - startoffset_fsb,
|
||||
0, 2, &firstfsb, &free_list, &done);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
/*
|
||||
* complete the transaction
|
||||
*/
|
||||
error = xfs_bmap_finish(&tp, &free_list, NULL);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
error = xfs_trans_commit(tp);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
out:
|
||||
return error;
|
||||
|
||||
error0:
|
||||
xfs_bmap_cancel(&free_list);
|
||||
error1:
|
||||
xfs_trans_cancel(tp);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
goto out;
|
||||
return xfs_zero_range(ip, offset, len, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "xfs_log.h"
|
||||
#include "xfs_icache.h"
|
||||
#include "xfs_pnfs.h"
|
||||
#include "xfs_iomap.h"
|
||||
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/falloc.h>
|
||||
|
@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
|
|||
}
|
||||
|
||||
/*
|
||||
* xfs_iozero clears the specified range supplied via the page cache (except in
|
||||
* the DAX case). Writes through the page cache will allocate blocks over holes,
|
||||
* though the callers usually map the holes first and avoid them. If a block is
|
||||
* not completely zeroed, then it will be read from disk before being partially
|
||||
* zeroed.
|
||||
*
|
||||
* In the DAX case, we can just directly write to the underlying pages. This
|
||||
* will not allocate blocks, but will avoid holes and unwritten extents and so
|
||||
* not do unnecessary work.
|
||||
* Clear the specified ranges to zero through either the pagecache or DAX.
|
||||
* Holes and unwritten extents will be left as-is as they already are zeroed.
|
||||
*/
|
||||
int
|
||||
xfs_iozero(
|
||||
struct xfs_inode *ip, /* inode */
|
||||
loff_t pos, /* offset in file */
|
||||
size_t count) /* size of data to zero */
|
||||
xfs_zero_range(
|
||||
struct xfs_inode *ip,
|
||||
xfs_off_t pos,
|
||||
xfs_off_t count,
|
||||
bool *did_zero)
|
||||
{
|
||||
struct page *page;
|
||||
struct address_space *mapping;
|
||||
int status = 0;
|
||||
|
||||
|
||||
mapping = VFS_I(ip)->i_mapping;
|
||||
do {
|
||||
unsigned offset, bytes;
|
||||
void *fsdata;
|
||||
|
||||
offset = (pos & (PAGE_SIZE -1)); /* Within page */
|
||||
bytes = PAGE_SIZE - offset;
|
||||
if (bytes > count)
|
||||
bytes = count;
|
||||
|
||||
if (IS_DAX(VFS_I(ip))) {
|
||||
status = dax_zero_page_range(VFS_I(ip), pos, bytes,
|
||||
xfs_get_blocks_direct);
|
||||
if (status)
|
||||
break;
|
||||
} else {
|
||||
status = pagecache_write_begin(NULL, mapping, pos, bytes,
|
||||
AOP_FLAG_UNINTERRUPTIBLE,
|
||||
&page, &fsdata);
|
||||
if (status)
|
||||
break;
|
||||
|
||||
zero_user(page, offset, bytes);
|
||||
|
||||
status = pagecache_write_end(NULL, mapping, pos, bytes,
|
||||
bytes, page, fsdata);
|
||||
WARN_ON(status <= 0); /* can't return less than zero! */
|
||||
status = 0;
|
||||
}
|
||||
pos += bytes;
|
||||
count -= bytes;
|
||||
} while (count);
|
||||
|
||||
return status;
|
||||
return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -423,49 +380,6 @@ xfs_file_splice_read(
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called to handle zeroing any space in the last block of the
|
||||
* file that is beyond the EOF. We do this since the size is being increased
|
||||
* without writing anything to that block and we don't want to read the
|
||||
* garbage on the disk.
|
||||
*/
|
||||
STATIC int /* error (positive) */
|
||||
xfs_zero_last_block(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fsize_t offset,
|
||||
xfs_fsize_t isize,
|
||||
bool *did_zeroing)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
|
||||
int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
|
||||
int zero_len;
|
||||
int nimaps = 1;
|
||||
int error = 0;
|
||||
struct xfs_bmbt_irec imap;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(nimaps > 0);
|
||||
|
||||
/*
|
||||
* If the block underlying isize is just a hole, then there
|
||||
* is nothing to zero.
|
||||
*/
|
||||
if (imap.br_startblock == HOLESTARTBLOCK)
|
||||
return 0;
|
||||
|
||||
zero_len = mp->m_sb.sb_blocksize - zero_offset;
|
||||
if (isize + zero_len > offset)
|
||||
zero_len = offset - isize;
|
||||
*did_zeroing = true;
|
||||
return xfs_iozero(ip, isize, zero_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero any on disk space between the current EOF and the new, larger EOF.
|
||||
*
|
||||
|
@ -484,94 +398,11 @@ xfs_zero_eof(
|
|||
xfs_fsize_t isize, /* current inode size */
|
||||
bool *did_zeroing)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t start_zero_fsb;
|
||||
xfs_fileoff_t end_zero_fsb;
|
||||
xfs_fileoff_t zero_count_fsb;
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_fileoff_t zero_off;
|
||||
xfs_fsize_t zero_len;
|
||||
int nimaps;
|
||||
int error = 0;
|
||||
struct xfs_bmbt_irec imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
||||
ASSERT(offset > isize);
|
||||
|
||||
trace_xfs_zero_eof(ip, isize, offset - isize);
|
||||
|
||||
/*
|
||||
* First handle zeroing the block on which isize resides.
|
||||
*
|
||||
* We only zero a part of that block so it is handled specially.
|
||||
*/
|
||||
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
|
||||
error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the range between the new size and the old where blocks
|
||||
* needing to be zeroed may exist.
|
||||
*
|
||||
* To get the block where the last byte in the file currently resides,
|
||||
* we need to subtract one from the size and truncate back to a block
|
||||
* boundary. We subtract 1 in case the size is exactly on a block
|
||||
* boundary.
|
||||
*/
|
||||
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
|
||||
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
|
||||
end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
|
||||
ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
|
||||
if (last_fsb == end_zero_fsb) {
|
||||
/*
|
||||
* The size was only incremented on its last block.
|
||||
* We took care of that above, so just return.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
ASSERT(start_zero_fsb <= end_zero_fsb);
|
||||
while (start_zero_fsb <= end_zero_fsb) {
|
||||
nimaps = 1;
|
||||
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
|
||||
&imap, &nimaps, 0);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(nimaps > 0);
|
||||
|
||||
if (imap.br_state == XFS_EXT_UNWRITTEN ||
|
||||
imap.br_startblock == HOLESTARTBLOCK) {
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* There are blocks we need to zero.
|
||||
*/
|
||||
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
|
||||
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
|
||||
|
||||
if ((zero_off + zero_len) > offset)
|
||||
zero_len = offset - zero_off;
|
||||
|
||||
error = xfs_iozero(ip, zero_off, zero_len);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
*did_zeroing = true;
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
}
|
||||
|
||||
return 0;
|
||||
return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -841,7 +672,7 @@ xfs_file_buffered_aio_write(
|
|||
write_retry:
|
||||
trace_xfs_file_buffered_write(ip, iov_iter_count(from),
|
||||
iocb->ki_pos, 0);
|
||||
ret = generic_perform_write(file, from, iocb->ki_pos);
|
||||
ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
|
||||
if (likely(ret >= 0))
|
||||
iocb->ki_pos += ret;
|
||||
|
||||
|
@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite(
|
|||
if (IS_DAX(inode)) {
|
||||
ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
|
||||
} else {
|
||||
ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
||||
ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
|
||||
ret = block_page_mkwrite_return(ret);
|
||||
}
|
||||
|
||||
|
|
|
@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
|
|||
enum xfs_prealloc_flags flags);
|
||||
int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
|
||||
xfs_fsize_t isize, bool *did_zeroing);
|
||||
int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
|
||||
int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
|
||||
bool *did_zero);
|
||||
loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
|
||||
loff_t eof, int whence);
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include <linux/iomap.h>
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
|
@ -940,3 +941,173 @@ xfs_iomap_write_unwritten(
|
|||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
return error;
|
||||
}
|
||||
|
||||
void
|
||||
xfs_bmbt_to_iomap(
|
||||
struct xfs_inode *ip,
|
||||
struct iomap *iomap,
|
||||
struct xfs_bmbt_irec *imap)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
if (imap->br_startblock == HOLESTARTBLOCK) {
|
||||
iomap->blkno = IOMAP_NULL_BLOCK;
|
||||
iomap->type = IOMAP_HOLE;
|
||||
} else if (imap->br_startblock == DELAYSTARTBLOCK) {
|
||||
iomap->blkno = IOMAP_NULL_BLOCK;
|
||||
iomap->type = IOMAP_DELALLOC;
|
||||
} else {
|
||||
iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
|
||||
if (imap->br_state == XFS_EXT_UNWRITTEN)
|
||||
iomap->type = IOMAP_UNWRITTEN;
|
||||
else
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
}
|
||||
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
|
||||
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
|
||||
iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
|
||||
}
|
||||
|
||||
static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
|
||||
{
|
||||
return !nimaps ||
|
||||
imap->br_startblock == HOLESTARTBLOCK ||
|
||||
imap->br_startblock == DELAYSTARTBLOCK;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_file_iomap_begin(
|
||||
struct inode *inode,
|
||||
loff_t offset,
|
||||
loff_t length,
|
||||
unsigned flags,
|
||||
struct iomap *iomap)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_bmbt_irec imap;
|
||||
xfs_fileoff_t offset_fsb, end_fsb;
|
||||
int nimaps = 1, error = 0;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -EIO;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
ASSERT(offset <= mp->m_super->s_maxbytes);
|
||||
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
|
||||
length = mp->m_super->s_maxbytes - offset;
|
||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
end_fsb = XFS_B_TO_FSB(mp, offset + length);
|
||||
|
||||
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
|
||||
&nimaps, XFS_BMAPI_ENTIRE);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
|
||||
/*
|
||||
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
|
||||
* pages to keep the chunks of work done where somewhat symmetric
|
||||
* with the work writeback does. This is a completely arbitrary
|
||||
* number pulled out of thin air as a best guess for initial
|
||||
* testing.
|
||||
*
|
||||
* Note that the values needs to be less than 32-bits wide until
|
||||
* the lower level functions are updated.
|
||||
*/
|
||||
length = min_t(loff_t, length, 1024 * PAGE_SIZE);
|
||||
if (xfs_get_extsz_hint(ip)) {
|
||||
/*
|
||||
* xfs_iomap_write_direct() expects the shared lock. It
|
||||
* is unlocked on return.
|
||||
*/
|
||||
xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_iomap_write_direct(ip, offset, length, &imap,
|
||||
nimaps);
|
||||
} else {
|
||||
error = xfs_iomap_write_delay(ip, offset, length, &imap);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
|
||||
xfs_bmbt_to_iomap(ip, iomap, &imap);
|
||||
} else if (nimaps) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
|
||||
xfs_bmbt_to_iomap(ip, iomap, &imap);
|
||||
} else {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
|
||||
iomap->blkno = IOMAP_NULL_BLOCK;
|
||||
iomap->type = IOMAP_HOLE;
|
||||
iomap->offset = offset;
|
||||
iomap->length = length;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_file_iomap_end_delalloc(
|
||||
struct xfs_inode *ip,
|
||||
loff_t offset,
|
||||
loff_t length,
|
||||
ssize_t written)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t start_fsb;
|
||||
xfs_fileoff_t end_fsb;
|
||||
int error = 0;
|
||||
|
||||
start_fsb = XFS_B_TO_FSB(mp, offset + written);
|
||||
end_fsb = XFS_B_TO_FSB(mp, offset + length);
|
||||
|
||||
/*
|
||||
* Trim back delalloc blocks if we didn't manage to write the whole
|
||||
* range reserved.
|
||||
*
|
||||
* We don't need to care about racing delalloc as we hold i_mutex
|
||||
* across the reserve/allocate/unreserve calls. If there are delalloc
|
||||
* blocks in the range, they are ours.
|
||||
*/
|
||||
if (start_fsb < end_fsb) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||
end_fsb - start_fsb);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
if (error && !XFS_FORCED_SHUTDOWN(mp)) {
|
||||
xfs_alert(mp, "%s: unable to clean up ino %lld",
|
||||
__func__, ip->i_ino);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_file_iomap_end(
|
||||
struct inode *inode,
|
||||
loff_t offset,
|
||||
loff_t length,
|
||||
ssize_t written,
|
||||
unsigned flags,
|
||||
struct iomap *iomap)
|
||||
{
|
||||
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
|
||||
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
|
||||
length, written);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct iomap_ops xfs_iomap_ops = {
|
||||
.iomap_begin = xfs_file_iomap_begin,
|
||||
.iomap_end = xfs_file_iomap_end,
|
||||
};
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#ifndef __XFS_IOMAP_H__
|
||||
#define __XFS_IOMAP_H__
|
||||
|
||||
#include <linux/iomap.h>
|
||||
|
||||
struct xfs_inode;
|
||||
struct xfs_bmbt_irec;
|
||||
|
||||
|
@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
|
|||
struct xfs_bmbt_irec *);
|
||||
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
|
||||
|
||||
void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
|
||||
struct xfs_bmbt_irec *);
|
||||
|
||||
extern struct iomap_ops xfs_iomap_ops;
|
||||
|
||||
#endif /* __XFS_IOMAP_H__*/
|
||||
|
|
|
@ -38,12 +38,13 @@
|
|||
#include "xfs_dir2.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_pnfs.h"
|
||||
#include "xfs_iomap.h"
|
||||
|
||||
#include <linux/capability.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/*
|
||||
|
@ -800,21 +801,31 @@ xfs_setattr_size(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Wait for all direct I/O to complete.
|
||||
*/
|
||||
inode_dio_wait(inode);
|
||||
|
||||
/*
|
||||
* File data changes must be complete before we start the transaction to
|
||||
* modify the inode. This needs to be done before joining the inode to
|
||||
* the transaction because the inode cannot be unlocked once it is a
|
||||
* part of the transaction.
|
||||
*
|
||||
* Start with zeroing any data block beyond EOF that we may expose on
|
||||
* file extension.
|
||||
* Start with zeroing any data beyond EOF that we may expose on file
|
||||
* extension, or zeroing out the rest of the block on a downward
|
||||
* truncate.
|
||||
*/
|
||||
if (newsize > oldsize) {
|
||||
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
|
||||
if (error)
|
||||
return error;
|
||||
} else {
|
||||
error = iomap_truncate_page(inode, newsize, &did_zeroing,
|
||||
&xfs_iomap_ops);
|
||||
}
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* We are going to log the inode size change in this transaction so
|
||||
* any previous writes that are beyond the on disk EOF and the new
|
||||
|
@ -823,17 +834,14 @@ xfs_setattr_size(
|
|||
* problem. Note that this includes any block zeroing we did above;
|
||||
* otherwise those blocks may not be zeroed after a crash.
|
||||
*/
|
||||
if (newsize > ip->i_d.di_size &&
|
||||
(oldsize != ip->i_d.di_size || did_zeroing)) {
|
||||
if (did_zeroing ||
|
||||
(newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
|
||||
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
|
||||
ip->i_d.di_size, newsize);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Now wait for all direct I/O to complete. */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
/*
|
||||
* We've already locked out new page faults, so now we can safely remove
|
||||
* pages from the page cache knowing they won't get refaulted until we
|
||||
|
@ -851,13 +859,6 @@ xfs_setattr_size(
|
|||
* to hope that the caller sees ENOMEM and retries the truncate
|
||||
* operation.
|
||||
*/
|
||||
if (IS_DAX(inode))
|
||||
error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
|
||||
else
|
||||
error = block_truncate_page(inode->i_mapping, newsize,
|
||||
xfs_get_blocks);
|
||||
if (error)
|
||||
return error;
|
||||
truncate_setsize(inode, newsize);
|
||||
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
|
||||
|
@ -998,51 +999,6 @@ xfs_vn_update_time(
|
|||
return xfs_trans_commit(tp);
|
||||
}
|
||||
|
||||
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
|
||||
|
||||
/*
|
||||
* Call fiemap helper to fill in user data.
|
||||
* Returns positive errors to xfs_getbmap.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_fiemap_format(
|
||||
void **arg,
|
||||
struct getbmapx *bmv,
|
||||
int *full)
|
||||
{
|
||||
int error;
|
||||
struct fiemap_extent_info *fieinfo = *arg;
|
||||
u32 fiemap_flags = 0;
|
||||
u64 logical, physical, length;
|
||||
|
||||
/* Do nothing for a hole */
|
||||
if (bmv->bmv_block == -1LL)
|
||||
return 0;
|
||||
|
||||
logical = BBTOB(bmv->bmv_offset);
|
||||
physical = BBTOB(bmv->bmv_block);
|
||||
length = BBTOB(bmv->bmv_length);
|
||||
|
||||
if (bmv->bmv_oflags & BMV_OF_PREALLOC)
|
||||
fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
|
||||
else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
|
||||
fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
|
||||
FIEMAP_EXTENT_UNKNOWN);
|
||||
physical = 0; /* no block yet */
|
||||
}
|
||||
if (bmv->bmv_oflags & BMV_OF_LAST)
|
||||
fiemap_flags |= FIEMAP_EXTENT_LAST;
|
||||
|
||||
error = fiemap_fill_next_extent(fieinfo, logical, physical,
|
||||
length, fiemap_flags);
|
||||
if (error > 0) {
|
||||
error = 0;
|
||||
*full = 1; /* user array now full */
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_vn_fiemap(
|
||||
struct inode *inode,
|
||||
|
@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
|
|||
u64 start,
|
||||
u64 length)
|
||||
{
|
||||
xfs_inode_t *ip = XFS_I(inode);
|
||||
struct getbmapx bm;
|
||||
int error;
|
||||
|
||||
error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
|
||||
if (error)
|
||||
return error;
|
||||
xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
|
||||
error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
|
||||
xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
|
||||
|
||||
/* Set up bmap header for xfs internal routine */
|
||||
bm.bmv_offset = BTOBBT(start);
|
||||
/* Special case for whole file */
|
||||
if (length == FIEMAP_MAX_OFFSET)
|
||||
bm.bmv_length = -1LL;
|
||||
else
|
||||
bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
|
||||
|
||||
/* We add one because in getbmap world count includes the header */
|
||||
bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
|
||||
fieinfo->fi_extents_max + 1;
|
||||
bm.bmv_count = min_t(__s32, bm.bmv_count,
|
||||
(PAGE_SIZE * 16 / sizeof(struct getbmapx)));
|
||||
bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
|
||||
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
|
||||
bm.bmv_iflags |= BMV_IF_ATTRFORK;
|
||||
if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
|
||||
bm.bmv_iflags |= BMV_IF_DELALLOC;
|
||||
|
||||
error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
|
|
@ -80,32 +80,6 @@ xfs_fs_get_uuid(
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_bmbt_to_iomap(
|
||||
struct xfs_inode *ip,
|
||||
struct iomap *iomap,
|
||||
struct xfs_bmbt_irec *imap)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
if (imap->br_startblock == HOLESTARTBLOCK) {
|
||||
iomap->blkno = IOMAP_NULL_BLOCK;
|
||||
iomap->type = IOMAP_HOLE;
|
||||
} else if (imap->br_startblock == DELAYSTARTBLOCK) {
|
||||
iomap->blkno = IOMAP_NULL_BLOCK;
|
||||
iomap->type = IOMAP_DELALLOC;
|
||||
} else {
|
||||
iomap->blkno =
|
||||
XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock);
|
||||
if (imap->br_state == XFS_EXT_UNWRITTEN)
|
||||
iomap->type = IOMAP_UNWRITTEN;
|
||||
else
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
}
|
||||
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
|
||||
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a layout for the pNFS client.
|
||||
*/
|
||||
|
|
|
@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
|
|||
DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
|
||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
|
||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_found);
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_simple_io_class,
|
||||
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
|
||||
|
|
Loading…
Reference in New Issue