New code for 5.10:

- Don't WARN_ON weird states that unprivileged users can create.
 - Don't invalidate page cache when direct writes want to fall back to
   buffered.
 - Fix some problems when readahead ios fail.
 - Fix a problem where inline data pages weren't getting flushed during
   an unshare operation.
 - Rework iomap to support arbitrarily many blocks per page in
   preparation to support THP for the page cache.
 - Fix a bug in the blocksize < pagesize buffered io path where we could
   fail to initialize the many-blocks-per-page uptodate bitmap correctly
   when the backing page is actually up to date.  This could cause us to
   forget to write out dirty pages.
 - Split out the generic_write_sync at the end of the directio write path
   so that btrfs can drop the inode lock before sync'ing the file.
 - Call inode_dio_end before trying to sync the file after a O_DSYNC
   direct write (instead of afterwards) to match the behavior of the
   old directio code.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAl9yB04ACgkQ+H93GTRK
 tOuZxw/+IrBV3HV45PtqQX+HC2F4ebax26cIJrmCQD0neiu16I7H3COjIGN/YOGw
 bN04VirC3bG4BtzVHO/eRHQOCwCevIpP3LkhT6yOfOgkO4Z9Xn/O7E+7uYtgT5Qi
 dBqOFe/aoB6+uHEHaioWUTxF1MlsVqEK/yPWjbSIdQGKFVE03Azj4V5QHtBouF2+
 pNEk7lbBnF0ua3biambeyDO3JTR9dsziIPH8QzQ4M/fMuNLfR2v0s6d4Ol/ndVrC
 Lp3RtThLcioAXh8xSPMO6RVUFfK97SLgNCRngApFbIJn85z9yq7eI7llnhO+XcHF
 FBJ+XottlwJFDt+0xNUaHmjkfUH9GoK8VeFOd3zHvp6xgZZpDkjG2JJk9ZC8Qnn5
 xg4grGngWshNdxFBf8S/O73bAJ1SyRcD5ePYGyMfiij3beGJ0aulKGoYOdDfC/4c
 hHcUc8XpjHSobg5gklQijBif0WIQos1Z4OyDK9d2LqrJOO0NUypO/t2YIdgPFzkj
 rXLmWlKsUYSZyefI5Z8q0AVy7TQGxstS9poC3lkXlsszQ1E5BNup0/bhCGTgCW+5
 az9m41KXxPEDLxieOvIAUhHSSP02IAGQ9Lvvat1GnGfEqShAEWS/IvmIxHDbvyNW
 lZ0NLqNKsItKBH0oIPsrP7fHz2ES1hUIMIaLbApUwKpUcAxrCLY=
 =ocIt
 -----END PGP SIGNATURE-----

Merge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull iomap updates from Darrick Wong:
 "There's not a lot of new stuff going on here -- a little bit of code
  refactoring to make iomap workable with btrfs' fsync locking model,
  cleanups in preparation for adding THP support for filesystems, and
  fixing a data corruption issue for blocksize < pagesize filesystems.

  Summary:

   - Don't WARN_ON weird states that unprivileged users can create.

   - Don't invalidate page cache when direct writes want to fall back to
     buffered.

   - Fix some problems when readahead ios fail.

   - Fix a problem where inline data pages weren't getting flushed
     during an unshare operation.

   - Rework iomap to support arbitrarily many blocks per page in
     preparation to support THP for the page cache.

   - Fix a bug in the blocksize < pagesize buffered io path where we
     could fail to initialize the many-blocks-per-page uptodate bitmap
     correctly when the backing page is actually up to date. This could
     cause us to forget to write out dirty pages.

   - Split out the generic_write_sync at the end of the directio write
     path so that btrfs can drop the inode lock before sync'ing the
     file.

   - Call inode_dio_end before trying to sync the file after a O_DSYNC
     direct write (instead of afterwards) to match the behavior of the
     old directio code"

* tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: Call inode_dio_end() before generic_write_sync()
  iomap: Allow filesystem to call iomap_dio_complete without i_rwsem
  iomap: Set all uptodate bits for an Uptodate page
  iomap: Change calling convention for zeroing
  iomap: Convert iomap_write_end types
  iomap: Convert write_count to write_bytes_pending
  iomap: Convert read_count to read_bytes_pending
  iomap: Support arbitrarily many blocks per page
  iomap: Use bitmap ops to set uptodate bits
  iomap: Use kzalloc to allocate iomap_page
  fs: Introduce i_blocks_per_page
  iomap: Fix misplaced page flushing
  iomap: Use round_down/round_up macros in __iomap_write_begin
  iomap: Mark read blocks uptodate in write_begin
  iomap: Clear page error before beginning a write
  iomap: Fix direct I/O write consistency check
  iomap: fix WARN_ON_ONCE() from unprivileged users
This commit is contained in:
Linus Torvalds 2020-10-14 12:23:00 -07:00
commit 37187df45a
8 changed files with 150 additions and 134 deletions

View File

@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
return ret;
}
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
struct iomap *iomap)
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
{
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
pgoff_t pgoff;
long rc, id;
void *kaddr;
bool page_aligned = false;
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
IS_ALIGNED(size, PAGE_SIZE))
(size == PAGE_SIZE))
page_aligned = true;
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
id = dax_read_lock();
if (page_aligned)
rc = dax_zero_page_range(iomap->dax_dev, pgoff,
size >> PAGE_SHIFT);
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
else
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
dax_flush(iomap->dax_dev, kaddr + offset, size);
}
dax_read_unlock(id);
return 0;
return size;
}
static loff_t

View File

@ -22,18 +22,25 @@
#include "../internal.h"
/*
* Structure allocated for each page when block size < PAGE_SIZE to track
* sub-page uptodate status and I/O completions.
* Structure allocated for each page or THP when block size < page size
* to track sub-page uptodate status and I/O completions.
*/
struct iomap_page {
atomic_t read_count;
atomic_t write_count;
atomic_t read_bytes_pending;
atomic_t write_bytes_pending;
spinlock_t uptodate_lock;
DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
unsigned long uptodate[];
};
static inline struct iomap_page *to_iomap_page(struct page *page)
{
/*
* per-block data is stored in the head page. Callers should
* not be dealing with tail pages (and if they are, they can
* call thp_head() first.
*/
VM_BUG_ON_PGFLAGS(PageTail(page), page);
if (page_has_private(page))
return (struct iomap_page *)page_private(page);
return NULL;
@ -45,20 +52,16 @@ static struct iomap_page *
iomap_page_create(struct inode *inode, struct page *page)
{
struct iomap_page *iop = to_iomap_page(page);
unsigned int nr_blocks = i_blocks_per_page(inode, page);
if (iop || i_blocksize(inode) == PAGE_SIZE)
if (iop || nr_blocks <= 1)
return iop;
iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
atomic_set(&iop->read_count, 0);
atomic_set(&iop->write_count, 0);
iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
GFP_NOFS | __GFP_NOFAIL);
spin_lock_init(&iop->uptodate_lock);
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
/*
* migrate_page_move_mapping() assumes that pages with private data have
* their count elevated by 1.
*/
if (PageUptodate(page))
bitmap_fill(iop->uptodate, nr_blocks);
attach_page_private(page, iop);
return iop;
}
@ -67,11 +70,14 @@ static void
iomap_page_release(struct page *page)
{
struct iomap_page *iop = detach_page_private(page);
unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
if (!iop)
return;
WARN_ON_ONCE(atomic_read(&iop->read_count));
WARN_ON_ONCE(atomic_read(&iop->write_count));
WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
PageUptodate(page));
kfree(iop);
}
@ -142,19 +148,11 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
struct inode *inode = page->mapping->host;
unsigned first = off >> inode->i_blkbits;
unsigned last = (off + len - 1) >> inode->i_blkbits;
bool uptodate = true;
unsigned long flags;
unsigned int i;
spin_lock_irqsave(&iop->uptodate_lock, flags);
for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
if (i >= first && i <= last)
set_bit(i, iop->uptodate);
else if (!test_bit(i, iop->uptodate))
uptodate = false;
}
if (uptodate)
bitmap_set(iop->uptodate, first, last - first + 1);
if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
SetPageUptodate(page);
spin_unlock_irqrestore(&iop->uptodate_lock, flags);
}
@ -171,13 +169,6 @@ iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
SetPageUptodate(page);
}
static void
iomap_read_finish(struct iomap_page *iop, struct page *page)
{
if (!iop || atomic_dec_and_test(&iop->read_count))
unlock_page(page);
}
static void
iomap_read_page_end_io(struct bio_vec *bvec, int error)
{
@ -191,7 +182,8 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error)
iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
}
iomap_read_finish(iop, page);
if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending))
unlock_page(page);
}
static void
@ -271,30 +263,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
}
ctx->cur_page_in_bio = true;
if (iop)
atomic_add(plen, &iop->read_bytes_pending);
/*
* Try to merge into a previous segment if we can.
*/
/* Try to merge into a previous segment if we can */
sector = iomap_sector(iomap, pos);
if (ctx->bio && bio_end_sector(ctx->bio) == sector)
if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
if (__bio_try_merge_page(ctx->bio, page, plen, poff,
&same_page))
goto done;
is_contig = true;
if (is_contig &&
__bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) {
if (!same_page && iop)
atomic_inc(&iop->read_count);
goto done;
}
/*
* If we start a new segment we need to increase the read count, and we
* need to do so before submitting any previous full bio to make sure
* that we don't prematurely unlock the page.
*/
if (iop)
atomic_inc(&iop->read_count);
if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
if (!is_contig || bio_full(ctx->bio, plen)) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
gfp_t orig_gfp = gfp;
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@ -571,13 +552,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
{
struct iomap_page *iop = iomap_page_create(inode, page);
loff_t block_size = i_blocksize(inode);
loff_t block_start = pos & ~(block_size - 1);
loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
unsigned from = offset_in_page(pos), to = from + len, poff, plen;
int status;
if (PageUptodate(page))
return 0;
ClearPageError(page);
do {
iomap_adjust_read_range(inode, iop, &block_start,
@ -594,14 +575,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
return -EIO;
zero_user_segments(page, poff, from, to, poff + plen);
iomap_set_range_uptodate(page, poff, plen);
continue;
} else {
int status = iomap_read_page_sync(block_start, page,
poff, plen, srcmap);
if (status)
return status;
}
status = iomap_read_page_sync(block_start, page, poff, plen,
srcmap);
if (status)
return status;
iomap_set_range_uptodate(page, poff, plen);
} while ((block_start += plen) < block_end);
return 0;
@ -685,9 +665,8 @@ iomap_set_page_dirty(struct page *page)
}
EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
static int
__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page)
static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
size_t copied, struct page *page)
{
flush_dcache_page(page);
@ -709,15 +688,15 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
return copied;
}
static int
iomap_write_end_inline(struct inode *inode, struct page *page,
struct iomap *iomap, loff_t pos, unsigned copied)
static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
struct iomap *iomap, loff_t pos, size_t copied)
{
void *addr;
WARN_ON_ONCE(!PageUptodate(page));
BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
flush_dcache_page(page);
addr = kmap_atomic(page);
memcpy(iomap->inline_data + pos, addr + pos, copied);
kunmap_atomic(addr);
@ -726,13 +705,14 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
return copied;
}
static int
iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
struct page *page, struct iomap *iomap, struct iomap *srcmap)
/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
size_t copied, struct page *page, struct iomap *iomap,
struct iomap *srcmap)
{
const struct iomap_page_ops *page_ops = iomap->page_ops;
loff_t old_size = inode->i_size;
int ret;
size_t ret;
if (srcmap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
@ -811,13 +791,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
flush_dcache_page(page);
status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
srcmap);
if (unlikely(status < 0))
break;
copied = status;
cond_resched();
@ -891,11 +866,8 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
srcmap);
if (unlikely(status <= 0)) {
if (WARN_ON_ONCE(status == 0))
return -EIO;
return status;
}
if (WARN_ON_ONCE(status == 0))
return -EIO;
cond_resched();
@ -928,11 +900,13 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
}
EXPORT_SYMBOL_GPL(iomap_file_unshare);
static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
struct iomap *iomap, struct iomap *srcmap)
{
struct page *page;
int status;
unsigned offset = offset_in_page(pos);
unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
if (status)
@ -944,38 +918,33 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
}
static loff_t
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
void *data, struct iomap *iomap, struct iomap *srcmap)
static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
loff_t length, void *data, struct iomap *iomap,
struct iomap *srcmap)
{
bool *did_zero = data;
loff_t written = 0;
int status;
/* already zeroed? we're done. */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
return count;
return length;
do {
unsigned offset, bytes;
offset = offset_in_page(pos);
bytes = min_t(loff_t, PAGE_SIZE - offset, count);
s64 bytes;
if (IS_DAX(inode))
status = dax_iomap_zero(pos, offset, bytes, iomap);
bytes = dax_iomap_zero(pos, length, iomap);
else
status = iomap_zero(inode, pos, offset, bytes, iomap,
srcmap);
if (status < 0)
return status;
bytes = iomap_zero(inode, pos, length, iomap, srcmap);
if (bytes < 0)
return bytes;
pos += bytes;
count -= bytes;
length -= bytes;
written += bytes;
if (did_zero)
*did_zero = true;
} while (count > 0);
} while (length > 0);
return written;
}
@ -1070,7 +1039,7 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
static void
iomap_finish_page_writeback(struct inode *inode, struct page *page,
int error)
int error, unsigned int len)
{
struct iomap_page *iop = to_iomap_page(page);
@ -1079,10 +1048,10 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
mapping_set_error(inode->i_mapping, -EIO);
}
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) <= 0);
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
if (!iop || atomic_dec_and_test(&iop->write_count))
if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
end_page_writeback(page);
}
@ -1116,7 +1085,8 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
/* walk each page on bio, ending page IO on them */
bio_for_each_segment_all(bv, bio, iter_all)
iomap_finish_page_writeback(inode, bv->bv_page, error);
iomap_finish_page_writeback(inode, bv->bv_page, error,
bv->bv_len);
bio_put(bio);
}
/* The ioend has been freed by bio_put() */
@ -1332,8 +1302,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
&same_page);
if (iop && !same_page)
atomic_inc(&iop->write_count);
if (iop)
atomic_add(len, &iop->write_bytes_pending);
if (!merged) {
if (bio_full(wpc->ioend->io_bio, len)) {
@ -1375,8 +1345,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
int error = 0, count = 0, i;
LIST_HEAD(submit_list);
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) != 0);
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
/*
* Walk through the page to find areas to write back. If we run off the

View File

@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
dio->submit.cookie = submit_bio(bio);
}
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
* ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
if (!dio->error &&
if (!dio->error && dio->size &&
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
int err;
err = invalidate_inode_pages2_range(inode->i_mapping,
@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
dio_warn_stale_pagecache(iocb->ki_filp);
}
inode_dio_end(file_inode(iocb->ki_filp));
/*
* If this is a DSYNC write, make sure we push it to stable storage now
* that we've written data.
@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
ret = generic_write_sync(iocb, ret);
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio);
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
static void iomap_dio_complete_work(struct work_struct *work)
{
@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
case IOMAP_INLINE:
return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
case IOMAP_DELALLOC:
/*
* DIO is not serialised against mmap() access at all, and so
* if the page_mkwrite occurs between the writeback and the
* iomap_apply() call in the DIO path, then it will see the
* DELALLOC block that the page-mkwrite allocated.
*/
pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n",
dio->iocb->ki_filp, current->comm);
return -EIO;
default:
WARN_ON_ONCE(1);
return -EIO;
@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
* Returns -ENOTBLK In case of a page invalidation invalidation failure for
* writes. The callers needs to fall back to buffered I/O in this case.
*/
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_dio *
__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion)
{
@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_dio *dio;
if (!count)
return 0;
return NULL;
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
return -EIO;
return ERR_PTR(-EIO);
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
dio->iocb = iocb;
atomic_set(&dio->ref, 1);
@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->wait_for_completion = wait_for_completion;
if (!atomic_dec_and_test(&dio->ref)) {
if (!wait_for_completion)
return -EIOCBQUEUED;
return ERR_PTR(-EIOCBQUEUED);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
__set_current_state(TASK_RUNNING);
}
return iomap_dio_complete(dio);
return dio;
out_free_dio:
kfree(dio);
return ret;
if (ret)
return ERR_PTR(ret);
return NULL;
}
EXPORT_SYMBOL_GPL(__iomap_dio_rw);
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion)
{
struct iomap_dio *dio;
dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
if (IS_ERR_OR_NULL(dio))
return PTR_ERR_OR_ZERO(dio);
return iomap_dio_complete(dio);
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);

View File

@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
struct inode *inode = page->mapping->host;
struct bio *bio = NULL;
int block_offset;
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
int blocks_per_page = i_blocks_per_page(inode, page);
sector_t page_start; /* address of page in fs blocks */
sector_t pblock;
int xlen;

View File

@ -544,7 +544,7 @@ xfs_discard_page(
page, ip->i_ino, offset);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
PAGE_SIZE / i_blocksize(inode));
i_blocks_per_page(inode, page));
if (error && !XFS_FORCED_SHUTDOWN(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
out_invalidate:

View File

@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
struct iomap *iomap);
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);

View File

@ -13,6 +13,7 @@
struct address_space;
struct fiemap_extent_info;
struct inode;
struct iomap_dio;
struct iomap_writepage_ctx;
struct iov_iter;
struct kiocb;
@ -258,6 +259,10 @@ struct iomap_dio_ops {
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion);
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion);
ssize_t iomap_dio_complete(struct iomap_dio *dio);
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
#ifdef CONFIG_SWAP

View File

@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page,
return offset;
}
/**
* i_blocks_per_page - How many blocks fit in this page.
* @inode: The inode which contains the blocks.
* @page: The page (head page if the page is a THP).
*
* If the block size is larger than the size of this page, return zero.
*
* Context: The caller should hold a refcount on the page to prevent it
* from being split.
* Return: The number of filesystem blocks covered by this page.
*/
static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
return thp_size(page) >> inode->i_blkbits;
}
#endif /* _LINUX_PAGEMAP_H */