mirror of https://gitee.com/openkylin/linux.git
ext4: completed_io locking cleanup
Current unwritten extent conversion state-machine is very fuzzy. - For unknown reason it performs conversion under i_mutex. What for? My diagnosis: We already protect extent tree with i_data_sem, truncate and punch_hole should wait for DIO, so the only data we have to protect is end_io->flags modification, but only flush_completed_IO and end_io_work modified this flags and we can serialize them via i_completed_io_lock. Currently all these games with mutex_trylock result in the following deadlock truncate: kworker: ext4_setattr ext4_end_io_work mutex_lock(i_mutex) inode_dio_wait(inode) ->BLOCK DEADLOCK<- mutex_trylock() inode_dio_done() #TEST_CASE1_BEGIN MNT=/mnt_scrach unlink $MNT/file fallocate -l $((1024*1024*1024)) $MNT/file aio-stress -I 100000 -O -s 100m -n -t 1 -c 10 -o 2 -o 3 $MNT/file sleep 2 truncate -s 0 $MNT/file #TEST_CASE1_END Or use 286's xfstests https://github.com/dmonakhov/xfstests/blob/devel/286 This patch makes state machine simple and clean: (1) xxx_end_io schedule final extent conversion simply by calling ext4_add_complete_io(), which append it to ei->i_completed_io_list NOTE1: because of (2A) work should be queued only if ->i_completed_io_list was empty, otherwise the work is scheduled already. (2) ext4_flush_completed_IO is responsible for handling all pending end_io from ei->i_completed_io_list Flushing sequence consists of following stages: A) LOCKED: Atomically drain completed_io_list to local_list B) Perform extents conversion C) LOCKED: move converted io's to to_free list for final deletion This logic depends on context which we was called from. D) Final end_io context destruction NOTE1: i_mutex is no longer required because end_io->flags modification is protected by ei->ext4_complete_io_lock Full list of changes: - Move all completion end_io related routines to page-io.c in order to improve logic locality - Move open coded logic from various xx_end_xx routines to ext4_add_complete_io() - remove EXT4_IO_END_FSYNC - Improve SMP scalability by removing useless i_mutex which does not protect io->flags anymore. - Reduce lock contention on i_completed_io_lock by optimizing list walk. - Rename ext4_end_io_nolock to end4_end_io and make it static - Check flush completion status to ext4_ext_punch_hole(). Because it is not good idea to punch blocks from corrupted inode. Changes since V3 (in request to Jan's comments): Fall back to active flush_completed_IO() approach in order to prevent performance issues with nolocked DIO reads. Changes since V2: Fix use-after-free caused by race truncate vs end_io_work Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
82e5422911
commit
28a535f9a0
|
@ -186,7 +186,6 @@ struct mpage_da_data {
|
||||||
#define EXT4_IO_END_ERROR 0x0002
|
#define EXT4_IO_END_ERROR 0x0002
|
||||||
#define EXT4_IO_END_QUEUED 0x0004
|
#define EXT4_IO_END_QUEUED 0x0004
|
||||||
#define EXT4_IO_END_DIRECT 0x0008
|
#define EXT4_IO_END_DIRECT 0x0008
|
||||||
#define EXT4_IO_END_IN_FSYNC 0x0010
|
|
||||||
|
|
||||||
struct ext4_io_page {
|
struct ext4_io_page {
|
||||||
struct page *p_page;
|
struct page *p_page;
|
||||||
|
@ -2418,11 +2417,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||||
|
|
||||||
/* page-io.c */
|
/* page-io.c */
|
||||||
extern int __init ext4_init_pageio(void);
|
extern int __init ext4_init_pageio(void);
|
||||||
|
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
|
||||||
extern void ext4_exit_pageio(void);
|
extern void ext4_exit_pageio(void);
|
||||||
extern void ext4_ioend_wait(struct inode *);
|
extern void ext4_ioend_wait(struct inode *);
|
||||||
extern void ext4_free_io_end(ext4_io_end_t *io);
|
extern void ext4_free_io_end(ext4_io_end_t *io);
|
||||||
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
|
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
|
||||||
extern int ext4_end_io_nolock(ext4_io_end_t *io);
|
|
||||||
extern void ext4_io_submit(struct ext4_io_submit *io);
|
extern void ext4_io_submit(struct ext4_io_submit *io);
|
||||||
extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||||
struct page *page,
|
struct page *page,
|
||||||
|
|
|
@ -4833,7 +4833,9 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* finish any pending end_io work */
|
/* finish any pending end_io work */
|
||||||
ext4_flush_completed_IO(inode);
|
err = ext4_flush_completed_IO(inode);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
credits = ext4_writepage_trans_blocks(inode);
|
credits = ext4_writepage_trans_blocks(inode);
|
||||||
handle = ext4_journal_start(inode, credits);
|
handle = ext4_journal_start(inode, credits);
|
||||||
|
|
|
@ -34,87 +34,6 @@
|
||||||
|
|
||||||
#include <trace/events/ext4.h>
|
#include <trace/events/ext4.h>
|
||||||
|
|
||||||
static void dump_completed_IO(struct inode * inode)
|
|
||||||
{
|
|
||||||
#ifdef EXT4FS_DEBUG
|
|
||||||
struct list_head *cur, *before, *after;
|
|
||||||
ext4_io_end_t *io, *io0, *io1;
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
|
|
||||||
ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
|
|
||||||
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
|
|
||||||
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
|
|
||||||
cur = &io->list;
|
|
||||||
before = cur->prev;
|
|
||||||
io0 = container_of(before, ext4_io_end_t, list);
|
|
||||||
after = cur->next;
|
|
||||||
io1 = container_of(after, ext4_io_end_t, list);
|
|
||||||
|
|
||||||
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
|
||||||
io, inode->i_ino, io0, io1);
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This function is called from ext4_sync_file().
|
|
||||||
*
|
|
||||||
* When IO is completed, the work to convert unwritten extents to
|
|
||||||
* written is queued on workqueue but may not get immediately
|
|
||||||
* scheduled. When fsync is called, we need to ensure the
|
|
||||||
* conversion is complete before fsync returns.
|
|
||||||
* The inode keeps track of a list of pending/completed IO that
|
|
||||||
* might needs to do the conversion. This function walks through
|
|
||||||
* the list and convert the related unwritten extents for completed IO
|
|
||||||
* to written.
|
|
||||||
* The function return the number of pending IOs on success.
|
|
||||||
*/
|
|
||||||
int ext4_flush_completed_IO(struct inode *inode)
|
|
||||||
{
|
|
||||||
ext4_io_end_t *io;
|
|
||||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
||||||
unsigned long flags;
|
|
||||||
int ret = 0;
|
|
||||||
int ret2 = 0;
|
|
||||||
|
|
||||||
dump_completed_IO(inode);
|
|
||||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
||||||
while (!list_empty(&ei->i_completed_io_list)){
|
|
||||||
io = list_entry(ei->i_completed_io_list.next,
|
|
||||||
ext4_io_end_t, list);
|
|
||||||
list_del_init(&io->list);
|
|
||||||
io->flag |= EXT4_IO_END_IN_FSYNC;
|
|
||||||
/*
|
|
||||||
* Calling ext4_end_io_nolock() to convert completed
|
|
||||||
* IO to written.
|
|
||||||
*
|
|
||||||
* When ext4_sync_file() is called, run_queue() may already
|
|
||||||
* about to flush the work corresponding to this io structure.
|
|
||||||
* It will be upset if it founds the io structure related
|
|
||||||
* to the work-to-be schedule is freed.
|
|
||||||
*
|
|
||||||
* Thus we need to keep the io structure still valid here after
|
|
||||||
* conversion finished. The io structure has a flag to
|
|
||||||
* avoid double converting from both fsync and background work
|
|
||||||
* queue work.
|
|
||||||
*/
|
|
||||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
||||||
ret = ext4_end_io_nolock(io);
|
|
||||||
if (ret < 0)
|
|
||||||
ret2 = ret;
|
|
||||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
||||||
io->flag &= ~EXT4_IO_END_IN_FSYNC;
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
||||||
return (ret2 < 0) ? ret2 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're not journaling and this is a just-created file, we have to
|
* If we're not journaling and this is a just-created file, we have to
|
||||||
* sync our parent directory (if it was freshly created) since
|
* sync our parent directory (if it was freshly created) since
|
||||||
|
|
|
@ -807,11 +807,9 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
if (rw == READ && ext4_should_dioread_nolock(inode)) {
|
if (rw == READ && ext4_should_dioread_nolock(inode)) {
|
||||||
if (unlikely(!list_empty(&ei->i_completed_io_list))) {
|
if (unlikely(!list_empty(&ei->i_completed_io_list)))
|
||||||
mutex_lock(&inode->i_mutex);
|
|
||||||
ext4_flush_completed_IO(inode);
|
ext4_flush_completed_IO(inode);
|
||||||
mutex_unlock(&inode->i_mutex);
|
|
||||||
}
|
|
||||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||||
inode->i_sb->s_bdev, iov,
|
inode->i_sb->s_bdev, iov,
|
||||||
offset, nr_segs,
|
offset, nr_segs,
|
||||||
|
|
|
@ -2881,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||||
{
|
{
|
||||||
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
|
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
|
||||||
ext4_io_end_t *io_end = iocb->private;
|
ext4_io_end_t *io_end = iocb->private;
|
||||||
struct workqueue_struct *wq;
|
|
||||||
unsigned long flags;
|
|
||||||
struct ext4_inode_info *ei;
|
|
||||||
|
|
||||||
/* if not async direct IO or dio with 0 bytes write, just return */
|
/* if not async direct IO or dio with 0 bytes write, just return */
|
||||||
if (!io_end || !size)
|
if (!io_end || !size)
|
||||||
|
@ -2912,24 +2909,14 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||||
io_end->iocb = iocb;
|
io_end->iocb = iocb;
|
||||||
io_end->result = ret;
|
io_end->result = ret;
|
||||||
}
|
}
|
||||||
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
|
|
||||||
|
|
||||||
/* Add the io_end to per-inode completed aio dio list*/
|
ext4_add_complete_io(io_end);
|
||||||
ei = EXT4_I(io_end->inode);
|
|
||||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
||||||
list_add_tail(&io_end->list, &ei->i_completed_io_list);
|
|
||||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
||||||
|
|
||||||
/* queue the work to convert unwritten extents to written */
|
|
||||||
queue_work(wq, &io_end->work);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
|
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
|
||||||
{
|
{
|
||||||
ext4_io_end_t *io_end = bh->b_private;
|
ext4_io_end_t *io_end = bh->b_private;
|
||||||
struct workqueue_struct *wq;
|
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (!test_clear_buffer_uninit(bh) || !io_end)
|
if (!test_clear_buffer_uninit(bh) || !io_end)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -2948,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
|
||||||
*/
|
*/
|
||||||
inode = io_end->inode;
|
inode = io_end->inode;
|
||||||
ext4_set_io_unwritten_flag(inode, io_end);
|
ext4_set_io_unwritten_flag(inode, io_end);
|
||||||
|
ext4_add_complete_io(io_end);
|
||||||
/* Add the io_end to per-inode completed io list*/
|
|
||||||
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
|
|
||||||
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
|
|
||||||
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
|
|
||||||
|
|
||||||
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
|
|
||||||
/* queue the work to convert unwritten extents to written */
|
|
||||||
queue_work(wq, &io_end->work);
|
|
||||||
out:
|
out:
|
||||||
bh->b_private = NULL;
|
bh->b_private = NULL;
|
||||||
bh->b_end_io = NULL;
|
bh->b_end_io = NULL;
|
||||||
|
|
|
@ -71,6 +71,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
BUG_ON(!io);
|
BUG_ON(!io);
|
||||||
|
BUG_ON(!list_empty(&io->list));
|
||||||
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
|
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
|
||||||
|
|
||||||
if (io->page)
|
if (io->page)
|
||||||
|
@ -83,21 +84,14 @@ void ext4_free_io_end(ext4_io_end_t *io)
|
||||||
kmem_cache_free(io_end_cachep, io);
|
kmem_cache_free(io_end_cachep, io);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* check a range of space and convert unwritten extents to written. */
|
||||||
* check a range of space and convert unwritten extents to written.
|
static int ext4_end_io(ext4_io_end_t *io)
|
||||||
*
|
|
||||||
* Called with inode->i_mutex; we depend on this when we manipulate
|
|
||||||
* io->flag, since we could otherwise race with ext4_flush_completed_IO()
|
|
||||||
*/
|
|
||||||
int ext4_end_io_nolock(ext4_io_end_t *io)
|
|
||||||
{
|
{
|
||||||
struct inode *inode = io->inode;
|
struct inode *inode = io->inode;
|
||||||
loff_t offset = io->offset;
|
loff_t offset = io->offset;
|
||||||
ssize_t size = io->size;
|
ssize_t size = io->size;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
|
||||||
|
|
||||||
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
||||||
"list->prev 0x%p\n",
|
"list->prev 0x%p\n",
|
||||||
io, inode->i_ino, io->list.next, io->list.prev);
|
io, inode->i_ino, io->list.next, io->list.prev);
|
||||||
|
@ -110,7 +104,6 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
|
||||||
"(inode %lu, offset %llu, size %zd, error %d)",
|
"(inode %lu, offset %llu, size %zd, error %d)",
|
||||||
inode->i_ino, offset, size, ret);
|
inode->i_ino, offset, size, ret);
|
||||||
}
|
}
|
||||||
io->flag &= ~EXT4_IO_END_UNWRITTEN;
|
|
||||||
if (io->iocb)
|
if (io->iocb)
|
||||||
aio_complete(io->iocb, io->result, 0);
|
aio_complete(io->iocb, io->result, 0);
|
||||||
|
|
||||||
|
@ -122,51 +115,122 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void dump_completed_IO(struct inode *inode)
|
||||||
|
{
|
||||||
|
#ifdef EXT4FS_DEBUG
|
||||||
|
struct list_head *cur, *before, *after;
|
||||||
|
ext4_io_end_t *io, *io0, *io1;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
|
||||||
|
ext4_debug("inode %lu completed_io list is empty\n",
|
||||||
|
inode->i_ino);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino);
|
||||||
|
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) {
|
||||||
|
cur = &io->list;
|
||||||
|
before = cur->prev;
|
||||||
|
io0 = container_of(before, ext4_io_end_t, list);
|
||||||
|
after = cur->next;
|
||||||
|
io1 = container_of(after, ext4_io_end_t, list);
|
||||||
|
|
||||||
|
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
||||||
|
io, inode->i_ino, io0, io1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add the io_end to per-inode completed end_io list. */
|
||||||
|
void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||||
|
{
|
||||||
|
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
||||||
|
struct workqueue_struct *wq;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
||||||
|
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||||
|
if (list_empty(&ei->i_completed_io_list)) {
|
||||||
|
io_end->flag |= EXT4_IO_END_QUEUED;
|
||||||
|
queue_work(wq, &io_end->work);
|
||||||
|
}
|
||||||
|
list_add_tail(&io_end->list, &ei->i_completed_io_list);
|
||||||
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||||
|
ext4_io_end_t *work_io)
|
||||||
|
{
|
||||||
|
ext4_io_end_t *io;
|
||||||
|
struct list_head unwritten, complete, to_free;
|
||||||
|
unsigned long flags;
|
||||||
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||||
|
int err, ret = 0;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&complete);
|
||||||
|
INIT_LIST_HEAD(&to_free);
|
||||||
|
|
||||||
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||||
|
dump_completed_IO(inode);
|
||||||
|
list_replace_init(&ei->i_completed_io_list, &unwritten);
|
||||||
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||||
|
|
||||||
|
while (!list_empty(&unwritten)) {
|
||||||
|
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
||||||
|
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
||||||
|
list_del_init(&io->list);
|
||||||
|
|
||||||
|
err = ext4_end_io(io);
|
||||||
|
if (unlikely(!ret && err))
|
||||||
|
ret = err;
|
||||||
|
|
||||||
|
list_add_tail(&io->list, &complete);
|
||||||
|
}
|
||||||
|
/* It is important to update all flags for all end_io in one shot w/o
|
||||||
|
* dropping the lock.*/
|
||||||
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||||
|
while (!list_empty(&complete)) {
|
||||||
|
io = list_entry(complete.next, ext4_io_end_t, list);
|
||||||
|
io->flag &= ~EXT4_IO_END_UNWRITTEN;
|
||||||
|
/* end_io context can not be destroyed now because it still
|
||||||
|
* used by queued worker. Worker thread will destroy it later */
|
||||||
|
if (io->flag & EXT4_IO_END_QUEUED)
|
||||||
|
list_del_init(&io->list);
|
||||||
|
else
|
||||||
|
list_move(&io->list, &to_free);
|
||||||
|
}
|
||||||
|
/* If we are called from worker context, it is time to clear queued
|
||||||
|
* flag, and destroy it's end_io if it was converted already */
|
||||||
|
if (work_io) {
|
||||||
|
work_io->flag &= ~EXT4_IO_END_QUEUED;
|
||||||
|
if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
|
||||||
|
list_add_tail(&work_io->list, &to_free);
|
||||||
|
}
|
||||||
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||||
|
|
||||||
|
while (!list_empty(&to_free)) {
|
||||||
|
io = list_entry(to_free.next, ext4_io_end_t, list);
|
||||||
|
list_del_init(&io->list);
|
||||||
|
ext4_free_io_end(io);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* work on completed aio dio IO, to convert unwritten extents to extents
|
* work on completed aio dio IO, to convert unwritten extents to extents
|
||||||
*/
|
*/
|
||||||
static void ext4_end_io_work(struct work_struct *work)
|
static void ext4_end_io_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
|
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
|
||||||
struct inode *inode = io->inode;
|
ext4_do_flush_completed_IO(io->inode, io);
|
||||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
}
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
int ext4_flush_completed_IO(struct inode *inode)
|
||||||
if (io->flag & EXT4_IO_END_IN_FSYNC)
|
{
|
||||||
goto requeue;
|
return ext4_do_flush_completed_IO(inode, NULL);
|
||||||
if (list_empty(&io->list)) {
|
|
||||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
||||||
goto free;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!mutex_trylock(&inode->i_mutex)) {
|
|
||||||
bool was_queued;
|
|
||||||
requeue:
|
|
||||||
was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
|
|
||||||
io->flag |= EXT4_IO_END_QUEUED;
|
|
||||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
||||||
/*
|
|
||||||
* Requeue the work instead of waiting so that the work
|
|
||||||
* items queued after this can be processed.
|
|
||||||
*/
|
|
||||||
queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work);
|
|
||||||
/*
|
|
||||||
* To prevent the ext4-dio-unwritten thread from keeping
|
|
||||||
* requeueing end_io requests and occupying cpu for too long,
|
|
||||||
* yield the cpu if it sees an end_io request that has already
|
|
||||||
* been requeued.
|
|
||||||
*/
|
|
||||||
if (was_queued)
|
|
||||||
yield();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
list_del_init(&io->list);
|
|
||||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
||||||
(void) ext4_end_io_nolock(io);
|
|
||||||
mutex_unlock(&inode->i_mutex);
|
|
||||||
free:
|
|
||||||
ext4_free_io_end(io);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||||
|
@ -199,9 +263,7 @@ static void buffer_io_error(struct buffer_head *bh)
|
||||||
static void ext4_end_bio(struct bio *bio, int error)
|
static void ext4_end_bio(struct bio *bio, int error)
|
||||||
{
|
{
|
||||||
ext4_io_end_t *io_end = bio->bi_private;
|
ext4_io_end_t *io_end = bio->bi_private;
|
||||||
struct workqueue_struct *wq;
|
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
unsigned long flags;
|
|
||||||
int i;
|
int i;
|
||||||
sector_t bi_sector = bio->bi_sector;
|
sector_t bi_sector = bio->bi_sector;
|
||||||
|
|
||||||
|
@ -259,14 +321,7 @@ static void ext4_end_bio(struct bio *bio, int error)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add the io_end to per-inode completed io list*/
|
ext4_add_complete_io(io_end);
|
||||||
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
|
|
||||||
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
|
|
||||||
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
|
|
||||||
|
|
||||||
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
|
|
||||||
/* queue the work to convert unwritten extents to written */
|
|
||||||
queue_work(wq, &io_end->work);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ext4_io_submit(struct ext4_io_submit *io)
|
void ext4_io_submit(struct ext4_io_submit *io)
|
||||||
|
|
Loading…
Reference in New Issue