mm/filemap.c: generic_file_buffered_read() now uses find_get_pages_contig
Convert generic_file_buffered_read() to get pages to read from in batches, and then copy data to userspace from many pages at once - in particular, we now don't touch any cachelines that might be contended while we're in the loop to copy data to userspace. This is is a performance improvement on workloads that do buffered reads with large blocksizes, and a very large performance improvement if that file is also being accessed concurrently by different threads. On smaller reads (512 bytes), there's a very small performance improvement (1%, within the margin of error). akpm: kernel test robot found a 32% speedup on one test: https://lkml.kernel.org/r/20201030081456.GY31092@shao2-debian Link: https://lkml.kernel.org/r/20201025212949.602194-3-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: kernel test robot <rong.a.chen@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
723ef24b9b
commit
06c0444290
317
mm/filemap.c
317
mm/filemap.c
|
@ -2176,67 +2176,6 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
|
||||||
return lock_page_killable(page);
|
return lock_page_killable(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int generic_file_buffered_read_page_ok(struct kiocb *iocb,
|
|
||||||
struct iov_iter *iter,
|
|
||||||
struct page *page)
|
|
||||||
{
|
|
||||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
|
||||||
struct inode *inode = mapping->host;
|
|
||||||
struct file_ra_state *ra = &iocb->ki_filp->f_ra;
|
|
||||||
unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
|
|
||||||
unsigned int bytes, copied;
|
|
||||||
loff_t isize, end_offset;
|
|
||||||
|
|
||||||
BUG_ON(iocb->ki_pos >> PAGE_SHIFT != page->index);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* i_size must be checked after we know the page is Uptodate.
|
|
||||||
*
|
|
||||||
* Checking i_size after the check allows us to calculate
|
|
||||||
* the correct value for "bytes", which means the zero-filled
|
|
||||||
* part of the page is not copied back to userspace (unless
|
|
||||||
* another truncate extends the file - this is desired though).
|
|
||||||
*/
|
|
||||||
|
|
||||||
isize = i_size_read(inode);
|
|
||||||
if (unlikely(iocb->ki_pos >= isize))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
|
|
||||||
|
|
||||||
bytes = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
|
|
||||||
|
|
||||||
/* If users can be writing to this page using arbitrary
|
|
||||||
* virtual addresses, take care about potential aliasing
|
|
||||||
* before reading the page on the kernel side.
|
|
||||||
*/
|
|
||||||
if (mapping_writably_mapped(mapping))
|
|
||||||
flush_dcache_page(page);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ok, we have the page, and it's up-to-date, so
|
|
||||||
* now we can copy it to user space...
|
|
||||||
*/
|
|
||||||
|
|
||||||
copied = copy_page_to_iter(page, offset, bytes, iter);
|
|
||||||
|
|
||||||
iocb->ki_pos += copied;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When a sequential read accesses a page several times,
|
|
||||||
* only mark it as accessed the first time.
|
|
||||||
*/
|
|
||||||
if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT)
|
|
||||||
mark_page_accessed(page);
|
|
||||||
|
|
||||||
ra->prev_pos = iocb->ki_pos;
|
|
||||||
|
|
||||||
if (copied < bytes)
|
|
||||||
return -EFAULT;
|
|
||||||
|
|
||||||
return !iov_iter_count(iter) || iocb->ki_pos == isize;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct page *
|
static struct page *
|
||||||
generic_file_buffered_read_readpage(struct kiocb *iocb,
|
generic_file_buffered_read_readpage(struct kiocb *iocb,
|
||||||
struct file *filp,
|
struct file *filp,
|
||||||
|
@ -2394,6 +2333,92 @@ generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
|
||||||
return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
|
return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
|
||||||
|
struct iov_iter *iter,
|
||||||
|
struct page **pages,
|
||||||
|
unsigned int nr)
|
||||||
|
{
|
||||||
|
struct file *filp = iocb->ki_filp;
|
||||||
|
struct address_space *mapping = filp->f_mapping;
|
||||||
|
struct file_ra_state *ra = &filp->f_ra;
|
||||||
|
pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
|
||||||
|
pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||||
|
int i, j, nr_got, err = 0;
|
||||||
|
|
||||||
|
nr = min_t(unsigned long, last_index - index, nr);
|
||||||
|
find_page:
|
||||||
|
if (fatal_signal_pending(current))
|
||||||
|
return -EINTR;
|
||||||
|
|
||||||
|
nr_got = find_get_pages_contig(mapping, index, nr, pages);
|
||||||
|
if (nr_got)
|
||||||
|
goto got_pages;
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_NOIO)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
|
||||||
|
|
||||||
|
nr_got = find_get_pages_contig(mapping, index, nr, pages);
|
||||||
|
if (nr_got)
|
||||||
|
goto got_pages;
|
||||||
|
|
||||||
|
pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
|
||||||
|
err = PTR_ERR_OR_ZERO(pages[0]);
|
||||||
|
if (!IS_ERR_OR_NULL(pages[0]))
|
||||||
|
nr_got = 1;
|
||||||
|
got_pages:
|
||||||
|
for (i = 0; i < nr_got; i++) {
|
||||||
|
struct page *page = pages[i];
|
||||||
|
pgoff_t pg_index = index + i;
|
||||||
|
loff_t pg_pos = max(iocb->ki_pos,
|
||||||
|
(loff_t) pg_index << PAGE_SHIFT);
|
||||||
|
loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
|
||||||
|
|
||||||
|
if (PageReadahead(page)) {
|
||||||
|
if (iocb->ki_flags & IOCB_NOIO) {
|
||||||
|
for (j = i; j < nr_got; j++)
|
||||||
|
put_page(pages[j]);
|
||||||
|
nr_got = i;
|
||||||
|
err = -EAGAIN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
page_cache_async_readahead(mapping, ra, filp, page,
|
||||||
|
pg_index, last_index - pg_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!PageUptodate(page)) {
|
||||||
|
if ((iocb->ki_flags & IOCB_NOWAIT) ||
|
||||||
|
((iocb->ki_flags & IOCB_WAITQ) && i)) {
|
||||||
|
for (j = i; j < nr_got; j++)
|
||||||
|
put_page(pages[j]);
|
||||||
|
nr_got = i;
|
||||||
|
err = -EAGAIN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = generic_file_buffered_read_pagenotuptodate(iocb,
|
||||||
|
filp, iter, page, pg_pos, pg_count);
|
||||||
|
if (IS_ERR_OR_NULL(page)) {
|
||||||
|
for (j = i + 1; j < nr_got; j++)
|
||||||
|
put_page(pages[j]);
|
||||||
|
nr_got = i;
|
||||||
|
err = PTR_ERR_OR_ZERO(page);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(nr_got))
|
||||||
|
return nr_got;
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
/*
|
||||||
|
* No pages and no error means we raced and should retry:
|
||||||
|
*/
|
||||||
|
goto find_page;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* generic_file_buffered_read - generic file read routine
|
* generic_file_buffered_read - generic file read routine
|
||||||
* @iocb: the iocb to read
|
* @iocb: the iocb to read
|
||||||
|
@ -2414,104 +2439,116 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
|
||||||
struct iov_iter *iter, ssize_t written)
|
struct iov_iter *iter, ssize_t written)
|
||||||
{
|
{
|
||||||
struct file *filp = iocb->ki_filp;
|
struct file *filp = iocb->ki_filp;
|
||||||
|
struct file_ra_state *ra = &filp->f_ra;
|
||||||
struct address_space *mapping = filp->f_mapping;
|
struct address_space *mapping = filp->f_mapping;
|
||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
struct file_ra_state *ra = &filp->f_ra;
|
struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
|
||||||
size_t orig_count = iov_iter_count(iter);
|
unsigned int nr_pages = min_t(unsigned int, 512,
|
||||||
pgoff_t last_index;
|
((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
|
||||||
int error = 0;
|
(iocb->ki_pos >> PAGE_SHIFT));
|
||||||
|
int i, pg_nr, error = 0;
|
||||||
|
bool writably_mapped;
|
||||||
|
loff_t isize, end_offset;
|
||||||
|
|
||||||
if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
|
if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
|
||||||
return 0;
|
return 0;
|
||||||
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
|
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
|
||||||
|
|
||||||
last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
|
if (nr_pages > ARRAY_SIZE(pages_onstack))
|
||||||
|
pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
|
||||||
|
|
||||||
/*
|
if (!pages) {
|
||||||
* If we've already successfully copied some data, then we
|
pages = pages_onstack;
|
||||||
* can no longer safely return -EIOCBQUEUED. Hence mark
|
nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
|
||||||
* an async read NOWAIT at that point.
|
}
|
||||||
*/
|
|
||||||
if (written && (iocb->ki_flags & IOCB_WAITQ))
|
|
||||||
iocb->ki_flags |= IOCB_NOWAIT;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
|
|
||||||
struct page *page;
|
|
||||||
|
|
||||||
|
do {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
find_page:
|
|
||||||
if (fatal_signal_pending(current)) {
|
/*
|
||||||
error = -EINTR;
|
* If we've already successfully copied some data, then we
|
||||||
goto out;
|
* can no longer safely return -EIOCBQUEUED. Hence mark
|
||||||
|
* an async read NOWAIT at that point.
|
||||||
|
*/
|
||||||
|
if ((iocb->ki_flags & IOCB_WAITQ) && written)
|
||||||
|
iocb->ki_flags |= IOCB_NOWAIT;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
|
||||||
|
pages, nr_pages);
|
||||||
|
if (pg_nr < 0) {
|
||||||
|
error = pg_nr;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't return -EIOCBQUEUED once we've done some work, so
|
* i_size must be checked after we know the pages are Uptodate.
|
||||||
* ensure we don't block:
|
*
|
||||||
|
* Checking i_size after the check allows us to calculate
|
||||||
|
* the correct value for "nr", which means the zero-filled
|
||||||
|
* part of the page is not copied back to userspace (unless
|
||||||
|
* another truncate extends the file - this is desired though).
|
||||||
*/
|
*/
|
||||||
if ((iocb->ki_flags & IOCB_WAITQ) &&
|
isize = i_size_read(inode);
|
||||||
(written + orig_count - iov_iter_count(iter)))
|
if (unlikely(iocb->ki_pos >= isize))
|
||||||
iocb->ki_flags |= IOCB_NOWAIT;
|
goto put_pages;
|
||||||
|
|
||||||
page = find_get_page(mapping, index);
|
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
|
||||||
if (!page) {
|
|
||||||
if (iocb->ki_flags & IOCB_NOIO)
|
while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
|
||||||
goto would_block;
|
(end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
|
||||||
page_cache_sync_readahead(mapping,
|
put_page(pages[--pg_nr]);
|
||||||
ra, filp,
|
|
||||||
index, last_index - index);
|
/*
|
||||||
page = find_get_page(mapping, index);
|
* Once we start copying data, we don't want to be touching any
|
||||||
if (unlikely(page == NULL)) {
|
* cachelines that might be contended:
|
||||||
page = generic_file_buffered_read_no_cached_page(iocb, iter);
|
*/
|
||||||
if (!page)
|
writably_mapped = mapping_writably_mapped(mapping);
|
||||||
goto find_page;
|
|
||||||
if (IS_ERR(page)) {
|
/*
|
||||||
error = PTR_ERR(page);
|
* When a sequential read accesses a page several times, only
|
||||||
goto out;
|
* mark it as accessed the first time.
|
||||||
}
|
*/
|
||||||
}
|
if (iocb->ki_pos >> PAGE_SHIFT !=
|
||||||
}
|
ra->prev_pos >> PAGE_SHIFT)
|
||||||
if (PageReadahead(page)) {
|
mark_page_accessed(pages[0]);
|
||||||
if (iocb->ki_flags & IOCB_NOIO) {
|
for (i = 1; i < pg_nr; i++)
|
||||||
put_page(page);
|
mark_page_accessed(pages[i]);
|
||||||
goto out;
|
|
||||||
}
|
for (i = 0; i < pg_nr; i++) {
|
||||||
page_cache_async_readahead(mapping,
|
unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
|
||||||
ra, filp, page,
|
unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
|
||||||
index, last_index - index);
|
PAGE_SIZE - offset);
|
||||||
}
|
unsigned int copied;
|
||||||
if (!PageUptodate(page)) {
|
|
||||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
/*
|
||||||
put_page(page);
|
* If users can be writing to this page using arbitrary
|
||||||
error = -EAGAIN;
|
* virtual addresses, take care about potential aliasing
|
||||||
goto out;
|
* before reading the page on the kernel side.
|
||||||
}
|
*/
|
||||||
page = generic_file_buffered_read_pagenotuptodate(iocb,
|
if (writably_mapped)
|
||||||
filp, iter, page, iocb->ki_pos, iter->count);
|
flush_dcache_page(pages[i]);
|
||||||
if (!page)
|
|
||||||
goto find_page;
|
copied = copy_page_to_iter(pages[i], offset, bytes, iter);
|
||||||
if (IS_ERR(page)) {
|
|
||||||
error = PTR_ERR(page);
|
written += copied;
|
||||||
goto out;
|
iocb->ki_pos += copied;
|
||||||
|
ra->prev_pos = iocb->ki_pos;
|
||||||
|
|
||||||
|
if (copied < bytes) {
|
||||||
|
error = -EFAULT;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
put_pages:
|
||||||
|
for (i = 0; i < pg_nr; i++)
|
||||||
|
put_page(pages[i]);
|
||||||
|
} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
|
||||||
|
|
||||||
error = generic_file_buffered_read_page_ok(iocb, iter, page);
|
|
||||||
put_page(page);
|
|
||||||
|
|
||||||
if (error) {
|
|
||||||
if (error > 0)
|
|
||||||
error = 0;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
would_block:
|
|
||||||
error = -EAGAIN;
|
|
||||||
out:
|
|
||||||
file_accessed(filp);
|
file_accessed(filp);
|
||||||
written += orig_count - iov_iter_count(iter);
|
|
||||||
|
if (pages != pages_onstack)
|
||||||
|
kfree(pages);
|
||||||
|
|
||||||
return written ? written : error;
|
return written ? written : error;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue