2009-04-07 10:01:33 +08:00
|
|
|
/*
|
|
|
|
* file.c - NILFS regular file handling primitives including fsync().
|
|
|
|
*
|
|
|
|
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*
|
|
|
|
* Written by Amagai Yoshiji <amagai@osrg.net>,
|
|
|
|
* Ryusuke Konishi <ryusuke@osrg.net>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/writeback.h>
|
|
|
|
#include "nilfs.h"
|
|
|
|
#include "segment.h"
|
|
|
|
|
2011-07-17 08:44:56 +08:00
|
|
|
int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
2009-04-07 10:01:33 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Called from fsync() system call
|
|
|
|
* This is the only entry point that can catch write and synch
|
|
|
|
* timing for both data blocks and intermediate blocks.
|
|
|
|
*
|
|
|
|
* This function should be implemented when the writeback function
|
|
|
|
* will be implemented.
|
|
|
|
*/
|
2012-06-01 07:26:11 +08:00
|
|
|
struct the_nilfs *nilfs;
|
2010-05-26 23:53:25 +08:00
|
|
|
struct inode *inode = file->f_mapping->host;
|
2014-12-11 07:54:29 +08:00
|
|
|
int err = 0;
|
2011-07-17 08:44:56 +08:00
|
|
|
|
2012-06-01 07:26:11 +08:00
|
|
|
if (nilfs_inode_dirty(inode)) {
|
|
|
|
if (datasync)
|
|
|
|
err = nilfs_construct_dsync_segment(inode->i_sb, inode,
|
2014-12-11 07:54:29 +08:00
|
|
|
start, end);
|
2012-06-01 07:26:11 +08:00
|
|
|
else
|
|
|
|
err = nilfs_construct_segment(inode->i_sb);
|
2011-07-17 08:44:56 +08:00
|
|
|
}
|
2012-06-01 07:26:11 +08:00
|
|
|
|
|
|
|
nilfs = inode->i_sb->s_fs_info;
|
2014-10-14 06:53:20 +08:00
|
|
|
if (!err)
|
|
|
|
err = nilfs_flush_device(nilfs);
|
|
|
|
|
2009-04-07 10:01:33 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2009-04-07 10:01:37 +08:00
|
|
|
static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
2009-04-07 10:01:33 +08:00
|
|
|
{
|
2009-04-07 10:01:37 +08:00
|
|
|
struct page *page = vmf->page;
|
2013-01-24 06:07:38 +08:00
|
|
|
struct inode *inode = file_inode(vma->vm_file);
|
2009-04-07 10:01:37 +08:00
|
|
|
struct nilfs_transaction_info ti;
|
2012-06-12 22:20:44 +08:00
|
|
|
int ret = 0;
|
2009-04-07 10:01:37 +08:00
|
|
|
|
2011-03-09 10:05:08 +08:00
|
|
|
if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
|
2009-04-07 10:01:37 +08:00
|
|
|
return VM_FAULT_SIGBUS; /* -ENOSPC */
|
|
|
|
|
2012-06-12 22:20:44 +08:00
|
|
|
sb_start_pagefault(inode->i_sb);
|
2009-04-07 10:01:37 +08:00
|
|
|
lock_page(page);
|
|
|
|
if (page->mapping != inode->i_mapping ||
|
|
|
|
page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
|
|
|
|
unlock_page(page);
|
2012-06-12 22:20:44 +08:00
|
|
|
ret = -EFAULT; /* make the VM retry the fault */
|
|
|
|
goto out;
|
2009-04-07 10:01:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check to see if the page is mapped already (no holes)
|
|
|
|
*/
|
2011-03-27 21:50:49 +08:00
|
|
|
if (PageMappedToDisk(page))
|
2009-04-07 10:01:37 +08:00
|
|
|
goto mapped;
|
2011-03-27 21:50:49 +08:00
|
|
|
|
2009-04-07 10:01:37 +08:00
|
|
|
if (page_has_buffers(page)) {
|
|
|
|
struct buffer_head *bh, *head;
|
|
|
|
int fully_mapped = 1;
|
|
|
|
|
|
|
|
bh = head = page_buffers(page);
|
|
|
|
do {
|
|
|
|
if (!buffer_mapped(bh)) {
|
|
|
|
fully_mapped = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while (bh = bh->b_this_page, bh != head);
|
|
|
|
|
|
|
|
if (fully_mapped) {
|
|
|
|
SetPageMappedToDisk(page);
|
|
|
|
goto mapped;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unlock_page(page);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fill hole blocks
|
|
|
|
*/
|
|
|
|
ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
|
|
|
|
/* never returns -ENOMEM, but may return -ENOSPC */
|
|
|
|
if (unlikely(ret))
|
2012-06-12 22:20:44 +08:00
|
|
|
goto out;
|
2009-04-07 10:01:37 +08:00
|
|
|
|
2012-10-01 11:04:56 +08:00
|
|
|
file_update_time(vma->vm_file);
|
2012-06-12 22:20:44 +08:00
|
|
|
ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
|
|
|
|
if (ret) {
|
2009-04-07 10:01:37 +08:00
|
|
|
nilfs_transaction_abort(inode->i_sb);
|
2012-06-12 22:20:44 +08:00
|
|
|
goto out;
|
2009-04-07 10:01:37 +08:00
|
|
|
}
|
2011-03-27 21:50:49 +08:00
|
|
|
nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
|
2009-04-07 10:01:37 +08:00
|
|
|
nilfs_transaction_commit(inode->i_sb);
|
|
|
|
|
|
|
|
mapped:
|
mm: only enforce stable page writes if the backing device requires it
Create a helper function to check if a backing device requires stable
page writes and, if so, performs the necessary wait. Then, make it so
that all points in the memory manager that handle making pages writable
use the helper function. This should provide stable page write support
to most filesystems, while eliminating unnecessary waiting for devices
that don't require the feature.
Before this patchset, all filesystems would block, regardless of whether
or not it was necessary. ext3 would wait, but still generate occasional
checksum errors. The network filesystems were left to do their own
thing, so they'd wait too.
After this patchset, all the disk filesystems except ext3 and btrfs will
wait only if the hardware requires it. ext3 (if necessary) snapshots
pages instead of blocking, and btrfs provides its own bdi so the mm will
never wait. Network filesystems haven't been touched, so either they
provide their own stable page guarantees or they don't block at all.
The blocking behavior is back to what it was before 3.0 if you don't
have a disk requiring stable page writes.
Here's the result of using dbench to test latency on ext2:
3.8.0-rc3:
Operation Count AvgLat MaxLat
----------------------------------------
WriteX 109347 0.028 59.817
ReadX 347180 0.004 3.391
Flush 15514 29.828 287.283
Throughput 57.429 MB/sec 4 clients 4 procs max_latency=287.290 ms
3.8.0-rc3 + patches:
WriteX 105556 0.029 4.273
ReadX 335004 0.005 4.112
Flush 14982 30.540 298.634
Throughput 55.4496 MB/sec 4 clients 4 procs max_latency=298.650 ms
As you can see, the maximum write latency drops considerably with this
patch enabled. The other filesystems (ext3/ext4/xfs/btrfs) behave
similarly, but see the cover letter for those results.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-22 08:42:51 +08:00
|
|
|
wait_for_stable_page(page);
|
2012-06-12 22:20:44 +08:00
|
|
|
out:
|
|
|
|
sb_end_pagefault(inode->i_sb);
|
|
|
|
return block_page_mkwrite_return(ret);
|
2009-04-07 10:01:33 +08:00
|
|
|
}
|
|
|
|
|
2009-09-28 02:29:37 +08:00
|
|
|
static const struct vm_operations_struct nilfs_file_vm_ops = {
|
2009-04-07 10:01:33 +08:00
|
|
|
.fault = filemap_fault,
|
2014-04-08 06:37:19 +08:00
|
|
|
.map_pages = filemap_map_pages,
|
2009-04-07 10:01:33 +08:00
|
|
|
.page_mkwrite = nilfs_page_mkwrite,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
file_accessed(file);
|
|
|
|
vma->vm_ops = &nilfs_file_vm_ops;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have mostly NULL's here: the current defaults are ok for
|
|
|
|
* the nilfs filesystem.
|
|
|
|
*/
|
2009-10-02 06:43:56 +08:00
|
|
|
const struct file_operations nilfs_file_operations = {
|
2009-04-07 10:01:33 +08:00
|
|
|
.llseek = generic_file_llseek,
|
2014-04-03 02:33:16 +08:00
|
|
|
.read = new_sync_read,
|
2014-04-03 15:17:43 +08:00
|
|
|
.write = new_sync_write,
|
2014-04-03 02:33:16 +08:00
|
|
|
.read_iter = generic_file_read_iter,
|
2014-04-03 15:17:43 +08:00
|
|
|
.write_iter = generic_file_write_iter,
|
2009-04-07 10:01:53 +08:00
|
|
|
.unlocked_ioctl = nilfs_ioctl,
|
2009-04-07 10:01:33 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
2011-02-03 20:26:17 +08:00
|
|
|
.compat_ioctl = nilfs_compat_ioctl,
|
2009-04-07 10:01:33 +08:00
|
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
.mmap = nilfs_file_mmap,
|
|
|
|
.open = generic_file_open,
|
|
|
|
/* .release = nilfs_release_file, */
|
|
|
|
.fsync = nilfs_sync_file,
|
|
|
|
.splice_read = generic_file_splice_read,
|
|
|
|
};
|
|
|
|
|
2009-09-22 08:01:11 +08:00
|
|
|
const struct inode_operations nilfs_file_inode_operations = {
|
2009-04-07 10:01:33 +08:00
|
|
|
.setattr = nilfs_setattr,
|
|
|
|
.permission = nilfs_permission,
|
2010-12-26 15:38:43 +08:00
|
|
|
.fiemap = nilfs_fiemap,
|
2009-04-07 10:01:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* end of file */
|